Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
529 changes: 529 additions & 0 deletions BACKUP/ground_truth/comparison.html

Large diffs are not rendered by default.

305 changes: 305 additions & 0 deletions BACKUP/ground_truth/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,305 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>RsMetaCheck Pitfalls Report</title>
<style>
body {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
background-color: #ffffff;
color: #333333;
margin: 0;
padding: 20px;
}
h1 {
text-align: center;
color: #2c3e50;
margin-bottom: 30px;
}
.table-container {
max-width: 95%;
margin: 0 auto;
overflow-x: auto;
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
border-radius: 8px;
}
table {
width: 100%;
border-collapse: collapse;
background-color: #fff;
}
th, td {
padding: 6px 10px;
text-align: left;
border-bottom: 1px solid #e0e0e0;
word-wrap: break-word;
word-break: break-word;
max-width: 350px;
}
th {
background-color: #f8f9fa;
font-weight: 600;
color: #2c3e50;
position: sticky;
top: 0;
z-index: 10;
}
tbody:hover {
background-color: #f5f5f5;
}
.repo-link {
color: #3498db;
text-decoration: none;
word-break: break-all;
}
.repo-link:hover {
text-decoration: underline;
}
.code-badge {
display: inline-block;
padding: 4px 8px;
border-radius: 4px;
font-size: 0.85em;
font-weight: bold;
}
.code-p {
background-color: #ffebee;
color: #c0392b;
border: 1px solid #ffcdd2;
}
.code-w {
background-color: #fff8e1;
color: #e67e22;
border: 1px solid #ffecb3;
}
.desc-text {
font-size: 0.9em;
color: #666;
margin-top: 4px;
}
.cell-only-030 {
background-color: #ffebee !important;
}
.cell-only-032 {
background-color: #e8f5e9 !important;
}
.cell-missing {
color: #bbb;
font-style: italic;
text-align: center;
}
.section-divider {
border: none;
border-top: 3px solid #e0e0e0;
max-width: 95%;
margin: 40px auto 20px auto;
}
.section-title {
text-align: center;
color: #2c3e50;
margin: 0 auto 20px auto;
}
.nav {
text-align: center;
margin-bottom: 30px;
}
.nav a {
color: #3498db;
text-decoration: none;
font-size: 0.95em;
padding: 6px 14px;
border: 1px solid #3498db;
border-radius: 4px;
}
.nav a:hover {
background-color: #3498db;
color: #fff;
}
</style>
</head>
<body>

<div class="nav">
<a href="comparison.html">0.3.0 vs 0.3.1 Comparison &rarr;</a>
</div>

<h1>Ground Truth Report</h1>

<p style="text-align: center; color: #555; font-size: 0.95em; line-height: 1.5; max-width: 800px; margin: 0 auto 20px auto;">
This ground truth study evaluated 84 software repositories from the openAIRE graph.
All repositories contain a <code>codemeta.json</code> file and were examined across
multiple metadata sources including package manifests (e.g., <code>setup.py</code>,
<code>package.json</code>, <code>DESCRIPTION</code>), citation files
(<code>CITATION.cff</code>), licensing files, README documentation, and other
structured or semi-structured metadata artifacts.
</p>

<p id="summary-text" style="text-align: center; color: #444; font-size: 1.05em; line-height: 1.4;"></p>

<div class="table-container">
<table id="pitfallsTable">
<thead>
<tr>
<th>Repository</th>
<th>Commit ID</th>
<th>Pitfall / Warning Code</th>
<th>Description</th>
<th>Source File</th>
</tr>
</thead>
</table>
</div>

<script>
document.addEventListener("DOMContentLoaded", () => {
const gtTable = document.querySelector('#pitfallsTable');

Promise.all([
fetch('summary_pitfalls_warnings.json').then(r => r.json()),
fetch('summary_0_3_0.json').then(r => r.json()),
fetch('summary_0_3_1.json').then(r => r.json())
])
.then(([gtData, data030, data031]) => {
const gtNames = new Set();
for (const entry of Object.values(gtData)) {
const url = (entry.url || '').replace(/\/$/, "").split("/");
if (url.length >= 2) gtNames.add(url.slice(-2).join("/"));
}
const allThree = new Set(
[...gtNames].filter(r => data030[r] && data031[r])
);

buildGroundTruthTable(gtTable, gtData, data030, data031, allThree);
})
.catch(err => {
console.error('Failed to load data:', err);
gtTable.innerHTML += '<tbody><tr><td colspan="5" style="text-align:center;color:red;">Error loading data.</td></tr></tbody>';
});
});

function buildGroundTruthTable(table, gtData, data030, data031, allThree) {
for (const [repoId, repoData] of Object.entries(gtData)) {
const url = repoData.url || 'Unknown URL';
const repoName = url.replace(/\/$/, "").split("/").slice(-2).join("/");

if (!allThree.has(repoName)) continue;

const r030 = data030[repoName] || {};
const r031 = data031[repoName] || {};

const matchedPitfalls = {};
for (const [code, info] of Object.entries(repoData.pitfalls || {})) {
const r031Code = r031.pitfalls && r031.pitfalls[code];
const r030Code = r030.pitfalls && r030.pitfalls[code];
if (r031Code || r030Code) {
const gtInfo = { source_file: info.source_file, description: info.description };
if (gtInfo.source_file === 'Metadata files (codemeta.json, setup.py, pom.xml etc...)') {
const resCode = r031Code || r030Code;
if (resCode && resCode.source_file) {
gtInfo.source_file = resCode.source_file;
}
}
matchedPitfalls[code] = gtInfo;
}
}

const matchedWarnings = {};
for (const [code, info] of Object.entries(repoData.warnings || {})) {
if (code === 'W003') continue;
const r031Code = r031.warnings && r031.warnings[code];
const r030Code = r030.warnings && r030.warnings[code];
if (r031Code || r030Code) {
const gtInfo = { source_file: info.source_file, description: info.description };
if (gtInfo.source_file === 'Metadata files (codemeta.json, setup.py, pom.xml etc...)') {
const resCode = r031Code || r030Code;
if (resCode && resCode.source_file) {
gtInfo.source_file = resCode.source_file;
}
}
matchedWarnings[code] = gtInfo;
}
}

const pEntries = Object.entries(matchedPitfalls);
const wEntries = Object.entries(matchedWarnings);
const totalRows = pEntries.length + wEntries.length;

if (totalRows === 0) continue;

const tbody = document.createElement('tbody');
let isFirstContext = true;

for (const [code, info] of pEntries) {
addGTRow(tbody, url, repoName, code, info, 'p', isFirstContext, totalRows, repoData, allThree);
isFirstContext = false;
}
for (const [code, info] of wEntries) {
addGTRow(tbody, url, repoName, code, info, 'w', isFirstContext, totalRows, repoData, allThree);
isFirstContext = false;
}

table.appendChild(tbody);
}
}

function addGTRow(tbody, url, repoName, code, info, type, isFirstContext, totalRows, repoData, allThree) {
const tr = document.createElement('tr');

if (isFirstContext) {
const tdRepo = document.createElement('td');
const aRepo = document.createElement('a');
aRepo.href = url;
aRepo.textContent = repoName;
aRepo.className = 'repo-link';
aRepo.target = '_blank';
tdRepo.appendChild(aRepo);
tdRepo.rowSpan = totalRows;
tr.appendChild(tdRepo);

const tdCommit = document.createElement('td');
tdCommit.style.whiteSpace = 'nowrap';
const commitId = repoData['commit ID'];
if (commitId) {
const aCommit = document.createElement('a');
let cleanUrl = url.endsWith('/') ? url.slice(0, -1) : url;
let treePath = cleanUrl.includes('gitlab') ? '/-/tree/' : '/tree/';
aCommit.href = `${cleanUrl}${treePath}${commitId}`;
aCommit.textContent = commitId.substring(0, 7);
aCommit.className = 'repo-link';
aCommit.style.wordBreak = 'normal';
aCommit.target = '_blank';
tdCommit.appendChild(aCommit);
} else {
tdCommit.textContent = 'Unknown';
}
tdCommit.rowSpan = totalRows;
tr.appendChild(tdCommit);
}

const tdCode = document.createElement('td');
const spanCode = document.createElement('span');
spanCode.textContent = code;
spanCode.className = `code-badge code-${type}`;
tdCode.appendChild(spanCode);

const tdDesc = document.createElement('td');
if (info.description && info.description !== "No description available") {
tdDesc.textContent = info.description;
tdDesc.className = 'desc-text';
} else {
tdDesc.textContent = '';
}

const tdSource = document.createElement('td');
tdSource.textContent = info.source_file || 'Unknown';

tr.appendChild(tdCode);
tr.appendChild(tdDesc);
tr.appendChild(tdSource);

tbody.appendChild(tr);
}
</script>
</body>
</html>
Loading
Loading