diff --git a/examples/data/data_java.json b/examples/data/data_java.json new file mode 100644 index 00000000..45aabdbb --- /dev/null +++ b/examples/data/data_java.json @@ -0,0 +1,2421 @@ +[ + { + "code_snippet":" private void drawURL(Graphics g) {\n\tString sense = (String)getAttribute(\"Sensitive\");\n\tif (sense != null && sense.length() > 0) {\n\t Rectangle r = displayBox();\n\t g.setColor(Color.red);\n\t g.setFont(dialogFont);\n\t g.drawString(\"url=\" + sense, r.x, r.y + r.height);\n\t}\n }\n", + "comment": "draws the map of the figure\n" + }, + { + "code_snippet":" protected SVGDrawingPanel createDrawingComponent() {\n SVGDrawingPanel p = new SVGDrawingPanel();\n DefaultDrawingEditor editor = new DefaultDrawingEditor();\n p.setEditor(new DefaultDrawingEditor());\n\n return p;\n }\n", + "comment": "creates the drawing component\n" + }, + { + "code_snippet":" public PropertyChangeListener getTarget() {\n return weakRef.get();\n }\n", + "comment": "returns the target of this proxy\n" + }, + { + "code_snippet":" protected Method getSetter() {\n try {\n return source.getClass().getMethod(\"set\" + Character.toUpperCase(propertyName.charAt(0)) + propertyName.substring(1), type);\n } catch (Exception e) {\n InternalError ie = new InternalError(\"Couldn't find setter for property \\\"\" + propertyName + \"\\\" in \" + source);\n ie.initCause(e);\n throw ie;\n }\n }\n", + "comment": "returns the setter for the property\n" + }, + { + "code_snippet":" public void add(JMenu submenu) {\n JMenuItem item = getPopupMenu().add(submenu);\n updateItemFont(submenu);\n }\n", + "comment": "adds a sub menu to the popup menu\n" + }, + { + "code_snippet":" protected Connector findConnectableConnector(Figure connectableFigure, Point2D.Double p) {\n Connector target = (connectableFigure == null) ? null : connectableFigure.findConnector(p, getConnection());\n\n if ((connectableFigure != null) && connectableFigure.isConnectable() && !connectableFigure.includes(getOwner()) && getConnection().canConnect(connector, target)) {\n return target;\n }\n return null;\n }\n", + "comment": "finds a connection end figure\n" + }, + { + "code_snippet":" public void setDrawingEditor(DrawingEditor newValue) {\n DrawingEditor oldValue = drawingEditor;\n if (drawingEditor != null) {\n drawingEditor.removePropertyChangeListener(eventHandler);\n }\n this.drawingEditor = newValue;\n if (drawingEditor != null) {\n drawingEditor.addPropertyChangeListener(new WeakPropertyChangeListener(eventHandler));\n }\n updateActiveView();\n }\n", + "comment": "attaches the figure attribute editor handler to the specified drawing editor\n" + }, + { + "code_snippet":" protected InputMap createInputMap() {\n return null;\n }\n", + "comment": "override this method to create a tool specific input map which\n" + }, + { + "code_snippet":" protected ActionMap createActionMap() {\n return null;\n }\n", + "comment": "override this method to create a tool specific action map which\n" + }, + { + "code_snippet":" public ImageIcon getIconProperty(String key, Class baseClass) {\n try {\n String rsrcName = getStringRecursive(key + \".icon\");\n\n if (rsrcName.equals(\"\")) {\n return null;\n }\n\n URL url = baseClass.getResource(rsrcName);\n if (isVerbose && url == null) {\n System.err.println(\"Warning ResourceBundleUtil[\" + baseName + \"].getIconProperty \\\"\" + key + \".icon\\\" resource:\" + rsrcName + \" not found.\");\n }\n return (url == null) ? null : new ImageIcon(url);\n } catch (MissingResourceException e) {\n if (isVerbose) {\n System.err.println(\"Warning ResourceBundleUtil[\" + baseName + \"].getIconProperty \\\"\" + key + \".icon\\\" not found.\");\n //e.printStackTrace();\n }\n return null;\n }\n }\n", + "comment": "get an image icon from the resource bundle\n" + }, + { + "code_snippet":" protected void addAction(JMenu m, View view, String actionID) {\n addAction(m, getAction(view, actionID));\n }\n", + "comment": "adds the specified action as a menu item to the supplied menu\n" + }, + { + "code_snippet":" protected void maybeAddSeparator(JMenu m) {\n m.putClientProperty(\"needsSeparator\", Boolean.TRUE);\n }\n", + "comment": "adds a separator to the supplied menu\n" + }, + { + "code_snippet":" protected void updateView(View oldValue, View newValue) {\n // We only need to do this, if the view has not been explicitly set\n if (view == null) {\n if (oldValue != null) {\n uninstallViewListeners(oldValue);\n }\n if (newValue != null) {\n installViewListeners(newValue);\n }\n firePropertyChange(VIEW_PROPERTY, oldValue, newValue);\n updateEnabled(oldValue != null && oldValue.isEnabled(),\n newValue != null && newValue.isEnabled());\n updateView();\n }\n }\n", + "comment": "updates the listeners of this action depending on the current view\n" + }, + { + "code_snippet":" protected void updateEnabled(boolean oldValue, boolean newValue) {\n firePropertyChange(\"enabled\", oldValue, isEnabled());\n }\n", + "comment": "updates the enabled state of this action depending on the new enabled\n" + }, + { + "code_snippet":" protected void notifyPropertyChangeListener(String property, Object oldValue, Object newValue) {\n // make a copy of the listener vector to synchronized not needed for transmit\n Vector v;\n synchronized(this)\n {\n v = (Vector) listeners.clone();\n }\n if (log.isDebugEnabled()) log.debug(\"notify \"+v.size()\n +\" listeners about property \"\n +property);\n // forward to all listeners\n int cnt = v.size();\n for (int i=0; i < cnt; i++) {\n PropertyChangeListener client = (PropertyChangeListener) v.elementAt(i);\n client.propertyChange(new PropertyChangeEvent(this, property, oldValue, newValue));\n }\n }\n", + "comment": "trigger the notification of all property change listeners\n" + }, + { + "code_snippet":" public void update() {\n\tpreviousValue = currentValue;\n\t currentValue = component.getPollData();\n }\n", + "comment": "save the last value from the component and then get the latest value\n" + }, + { + "code_snippet":" public void poll() {\n\tcontroller.poll();\n\n\tfor (JInputComponent c: components)\n\t c.update();\n }\n", + "comment": "poll the current state of the jinput device\n" + }, + { + "code_snippet":" private void startPolling() {\n\t\tif ((pollTimer != null) ) {\n\t\t if (!pollTimer.isRunning())\n\t\t \tpollTimer.start();\n\t\t}\n }\n", + "comment": "setup a timer to start polling the jinput devices at a specified time\n" + }, + { + "code_snippet":" private void stopPolling() {\n\t\tif ((pollTimer != null) && (listenerList.getListenerCount() <= 0)) {\n\t\t if (pollTimer.isRunning())\n\t\t \tpollTimer.stop();\n\t\t}\n } \n", + "comment": "stop polling if there are no registered listeners\n" + }, + { + "code_snippet":"\tpublic boolean hasFixedLength() {\n\t\treturn length != 0;\n\t}\n", + "comment": "returns whether or not the property has a fixed length\n" + }, + { + "code_snippet":"\tpublic int getFixedLength() {\n\t\treturn length;\n\t}\n", + "comment": "returns the fixed length of the property or code 0 code if the\n" + }, + { + "code_snippet":"\tpublic boolean supportsFrameFileAlterDiscard() {\n\t\treturn false;\n\t}\n", + "comment": "returns whether or not this spec supports the file alteration discard\n" + }, + { + "code_snippet":"\tpublic boolean supportsFrameReadOnly() {\n\t\treturn false;\n\t}\n", + "comment": "returns whether or not this spec supports read only mode\n" + }, + { + "code_snippet":"\tpublic boolean supportsFrameGroupID() {\n\t\treturn false;\n\t}\n", + "comment": "returns whether or not this spec supports a group id\n" + }, + { + "code_snippet":"\tpublic boolean supportsFrameCompression() {\n\t\treturn false;\n\t}\n", + "comment": "returns whether or not this spec supports the compression of\n" + }, + { + "code_snippet":"\tpublic boolean supportsFrameEncryption() {\n\t\treturn false;\n\t}\n", + "comment": "returns whether or not this spec supports the encryption of\n" + }, + { + "code_snippet":"\tpublic boolean supportsFrameUnsynchronisation() {\n\t\treturn false;\n\t}\n", + "comment": "returns whether or not this spec supports unsynchronisation on frame\n" + }, + { + "code_snippet":"\tpublic void addPropertyChangeListener(PropertyChangeListener l) {\n\t\tchangeSupport.addPropertyChangeListener(l);\n\t}\n", + "comment": "adds the specified property change listener to receive property change\n" + }, + { + "code_snippet":"\tpublic boolean supportsTagUnsynchronisation() {\n\t\treturn false;\n\t}\n", + "comment": "returns whether or not this implementation supports tag wide\n" + }, + { + "code_snippet":"\tpublic void removePropertyChangeListener(PropertyChangeListener l) {\n\t\tchangeSupport.removePropertyChangeListener(l);\n\t}\n", + "comment": "removes the specified property change listener so that it no longer\n" + }, + { + "code_snippet":"\tpublic boolean supportsTagFooter() {\n\t\treturn false;\n\t}\n", + "comment": "returns whether or not this implementation supports the include of a tag\n" + }, + { + "code_snippet":"\tpublic Object createFrameSpecDataObject(ID3v2Frame frame) {\n\t\treturn null;\n\t}\n", + "comment": "subclasses may override this method to return an object where spec\n" + }, + { + "code_snippet":"\tpublic boolean supports(int frameType) {\n\t\treturn (frameType == ID3v2Frame.UNDEFINED_FRAME_TYPE) ||\n\t\t\t (getFrameDefinition(frameType) != defUndefinedFrame);\n\t}\n", + "comment": "returns whether or not the frame type code frame type code is supported\n" + }, + { + "code_snippet":"\tprotected byte getByteValue(String framePropertyValue) {\n\t\tbyte[] byteArray = getByteArrayValue(framePropertyValue);\n\t\tif (byteArray.length > 0)\n\t\t\treturn byteArray[0];\n\t\treturn 0x00;\n\t}\n", + "comment": "returns the byte value that is represented by the frame property value\n" + }, + { + "code_snippet":"\tpublic int getFrameType(String frameID) {\n\t\tInteger type = (Integer) frameIDToTypeMappings.get(frameID);\n\t\tif (type == null) {\n\t\t\treturn ID3v2Frame.UNDEFINED_FRAME_TYPE;\n\t\t}\n\t\treturn type.intValue();\n\t}\n", + "comment": "returns the spec independent frame type for the spec dependent frame id\n" + }, + { + "code_snippet":"\tpublic FrameDefinition getFrameDefinition(int type) {\n\t\tFrameDefinition definition = frameTypeToDefinitionMappings[type];\n\t\tif (definition == null) {\n\t\t\tdefinition = defUndefinedFrame;\n\t\t}\n\t\treturn definition;\n\t}\n", + "comment": "returns the default frame definition for a frame of type\n" + }, + { + "code_snippet":"\tpublic FrameDefinition getFrameDefinition(int frameType, String id) {\n\t\tFrameDefinition def = (FrameDefinition) getFrameDefinition(frameType);\n\t\tif (frameType == ID3v2Frame.UNDEFINED_FRAME_TYPE) {\n\t\t\tdef = (FrameDefinition) def.clone();\n\t\t\tdef.id = id;\n\t\t}\n\t\treturn def;\n\t}\n", + "comment": "creates a unique frame definition object for the frame\n" + }, + { + "code_snippet":"\tpublic boolean equals(Object o) {\n\t\tif (!(o instanceof ID3v2Frame))\n\t\t\treturn false;\n\t\tID3v2Frame frame = (ID3v2Frame) o;\n\t\tif (frame == this)\n\t\t\treturn true;\n\t\tif (frame.getType() == type) {\n\t\t\tfor (int i = 0; i < definition.propertyDefinitions.length; ++i) {\n\t\t\t\tif (! definition.propertyDefinitions[i].equals(frame.getPropertyAt(i)))\n\t\t\t\t\treturn false;\n\t\t\t}\n\t\t\treturn true;\n\t\t}\n\t\treturn false;\n\t}\n", + "comment": "returns whether or not the\n" + }, + { + "code_snippet":"\tpublic boolean equals(Object o) {\n\t\tif (o instanceof ID3v2Spec) {\n\t\t\tID3v2Spec spec = (ID3v2Spec) o;\n\t\t\tif (spec.getVersion() == this.getVersion()\n\t\t\t\t\t&& spec.getRevision() == this.getRevision())\n\t\t\t\treturn true;\n\t\t}\n\t\treturn false;\n\t}\n", + "comment": "returns whether or not this spec and the code object code\n" + }, + { + "code_snippet":"\tpublic Object getSpecData() {\n\t\treturn specData;\n\t}\n", + "comment": "returns the object an\n" + }, + { + "code_snippet":"\tprotected void setTagSize(ID3v2Tag tag, int size) {\n\t\ttag.setSize(size);\n\t}\n", + "comment": "really ugly method that subclasses may use to set the tags size without\n" + }, + { + "code_snippet":"\tpublic ID3v2Spec getSpec() {\n\t\treturn spec;\n\t}\n", + "comment": "returns the spec used by this frame\n" + }, + { + "code_snippet":"\tpublic String getID() {\n\t\treturn definition.id;\n\t}\n", + "comment": "returns the spec dependent lexical id identifying this frame just as you\n" + }, + { + "code_snippet":"\tpublic Iterator properties() {\n\t\treturn new Iterator() {\n\t\t\tint index = 0;\n\n\t\t\tpublic boolean hasNext() {\n\t\t\t\treturn index < properties.length;\n\t\t\t}\n\n\t\t\tpublic Object next() throws NoSuchElementException {\n\t\t\t\tif (index >= definition.propertyDefinitions.length - 1)\n\t\t\t\t\tthrow new NoSuchElementException();\n\t\t\t\tFrameProperty property = properties[index++];\n\t\t\t\treturn property;\n\t\t\t}\n\n\t\t\tpublic void remove() throws UnsupportedOperationException {\n\t\t\t\tthrow new UnsupportedOperationException();\n\t\t\t}\n\t\t};\n\t}\n", + "comment": "returns an enumeration of all\n" + }, + { + "code_snippet":"\tpublic int getPropertyCount() {\n\t\treturn definition.propertyDefinitions.length;\n\t}\n", + "comment": "returns the property count of this frame\n" + }, + { + "code_snippet":"\tpublic boolean contains(int type) {\n\t\tfor (int i = 0; i < definition.propertyDefinitions.length; ++i)\n\t\t\tif (properties[i].getDefinition().getID() == type)\n\t\t\t\treturn true;\n\t\treturn false;\n\t}\n", + "comment": "returns whether or not this frame contains a code frame property code\n" + }, + { + "code_snippet":"\tpublic FrameProperty get(int type) {\n\t\tfor (int i = 0; i < definition.propertyDefinitions.length; ++i) {\n\t\t\tif (properties[i].getDefinition().getID() == type) {\n\t\t\t\treturn properties[i];\n\t\t\t}\n\t\t}\n\t\treturn null;\n\t}\n", + "comment": "returns the code frame property code of type code type code or\n" + }, + { + "code_snippet":"\tpublic FrameProperty getPropertyAt(int index) {\n\t\treturn properties[index];\n\t}\n", + "comment": "returns the frame property at index code index code\n" + }, + { + "code_snippet":"\tpublic boolean isReadOnly() {\n\t\treturn isReadOnly;\n\t}\n", + "comment": "returns whether or not the frames data is supposed to be read only\n" + }, + { + "code_snippet":"\tpublic boolean getUseGroupID() {\n\t\treturn useGroupID;\n\t}\n", + "comment": "returns whether or not a group id was set for this frame\n" + }, + { + "code_snippet":"\tpublic boolean getUseEncryption() {\n\t\treturn useEncryption;\n\t}\n", + "comment": "returns whether or not the frames data is to be encrypted\n" + }, + { + "code_snippet":"\tpublic boolean getUseCompression() {\n\t\treturn useCompression;\n\t}\n", + "comment": "returns whether or not the frames data is to be compressed\n" + }, + { + "code_snippet":"\tpublic boolean getUseUnsynchronisation() {\n\t\treturn useUnsynchronisation;\n\t}\n", + "comment": "returns whether or not this frames data is to be unsynchronised\n" + }, + { + "code_snippet":"\tpublic void setTagAlterDiscard(boolean b) {\n\t\tif (!spec.supportsFrameTagAlterDiscard() && b) {\n\t\t\ttestChange();\n\t\t\ttag.requestSpecWhich(\"supportsFrameTagAlterDiscard\");\n\t\t}\n\t\tthis.tagAlterDiscard = b;\n\t\tif (tag != null)\n\t\t\ttag.notifyFlagChanged(this);\n\t}\n", + "comment": "marks this frame to be discardered on tag alteration\n" + }, + { + "code_snippet":"\tpublic void setReadOnly(boolean b) throws IllegalTagStateException {\n\t\tif (!spec.supportsFrameReadOnly() && b) {\n\t\t\ttestChange();\n\t\t\ttag.requestSpecWhich(\"supportsFrameReadOnly\");\n\t\t}\n\t\tthis.isReadOnly = b;\n\t\tif (tag != null)\n\t\t\ttag.notifyFlagChanged(this);\n\t}\n", + "comment": "tries to set the frames data to be read only\n" + }, + { + "code_snippet":"\tpublic void setUseCompression(boolean b) {\n\t\tboolean oldValue = useCompression;\n\t\tboolean newValue = b;\n\t\tif (!spec.supportsFrameCompression() && newValue) {\n\t\t\ttestChange();\n\t\t\ttag.requestSpecWhich(\"supportsFrameCompression\");\n\t\t}\n\t\tthis.useCompression = newValue;\n\t\tchangeSupport.firePropertyChange(\n\t\t\tUSE_COMPRESSION_PROPERTY,\n\t\t\toldValue,\n\t\t\tnewValue);\n\t\tif (tag != null)\n\t\t\ttag.notifyFlagChanged(this);\n\t}\n", + "comment": "tries to set the frames data to be compressed\n" + }, + { + "code_snippet":"\tpublic void setUseGroupID(boolean useGroupID, byte groupID) {\n\t\tboolean oldValue = this.useGroupID;\n\t\tboolean newValue = useGroupID;\n\t\tif (!spec.supportsFrameGroupID() && newValue) {\n\t\t\ttestChange();\n\t\t\ttag.requestSpecWhich(\"supportsFrameGroupID\");\n\t\t}\n\t\tthis.useGroupID = newValue;\n\t\tthis.groupID = groupID;\n\t\tchangeSupport.firePropertyChange(\n\t\t\tUSE_GROUP_ID_PROPERTY,\n\t\t\toldValue,\n\t\t\tnewValue);\n\t\tif (tag != null)\n\t\t\ttag.notifyFlagChanged(this);\n\t}\n", + "comment": "tries to set a group id for this frame\n" + }, + { + "code_snippet":"\tpublic boolean equals(Object object) {\n\t\tif (!(object instanceof FrameProperty))\n\t\t\treturn false;\n\t\tFrameProperty property = (FrameProperty) object;\n\t\tif (definition.getDataType() == property.getDefinition().getDataType())\n\t\t\tif (value.equals(property.toString()))\n\t\t\t\treturn true;\n\t\treturn false;\n\t}\n", + "comment": "returns whether or not the object code object code is equal to this\n" + }, + { + "code_snippet":"\tpublic byte toByte() {\n\t\tbyte[] bytes = toByteArray();\n\t\tif (bytes.length == 0)\n\t\t\treturn 0x00;\n\t\treturn bytes[0];\n\t}\n", + "comment": "returns the byte value of the property\n" + }, + { + "code_snippet":"\tpublic ID3v2Frame getFrame() {\n\t\treturn frame;\n\t}\n", + "comment": "returns the frame this property belongs to\n" + }, + { + "code_snippet":"\tpublic void setFrame(ID3v2Frame f) {\n\t\tthis.frame = f;\n\t}\n", + "comment": "sets the code id3v2 frame code this property belongs to\n" + }, + { + "code_snippet":"\tpublic void setValue(String newValue) throws IllegalArgumentException {\n\t\tif (frame != null) {\n\t\t\tif (! frame.getSpec().supports(definition.getID(), newValue))\n\t\t\t\tthrow new IllegalArgumentException(value);\n\t\t}\n\t\tString oldValue = value;\n\t\tthis.value = newValue;\n\t\tif (frame != null) {\n\t\t\tframe.notifyFramePropertyChanged(this, oldValue, newValue);\n\t\t}\n\t}\n", + "comment": "sets the string value for this property to the value\n" + }, + { + "code_snippet":"\tpublic void setValue(byte value) throws IllegalArgumentException {\n\t\tsetValue(new byte[] { value }, 0, 1);\n\t}\n", + "comment": "sets the byte value for this property to the value code value code\n" + }, + { + "code_snippet":"\tpublic void setValue(byte[] value) throws IllegalArgumentException {\n\t\tsetValue(value, 0, value.length);\n\t}\n", + "comment": "sets the binary value for this property to the value\n" + }, + { + "code_snippet":"\tpublic void add(ID3v2Frame frame) throws IllegalArgumentException {\n\t\t//if (frame.get(FrameProperty.MAIN).toString().equals(\"\"))\n\t\t//\tthrow new IllegalArgumentException(\"FrameProperty.MAIN must not be empty\");\n\t\tif (frames.contains(frame)) {\n\t\t\tthrow new IllegalArgumentException(\"frame is already part of the tag.\");\n\t\t}\n\n\t\tframe.setTag(this);\n\t\tframes.add(frame);\n\n\t\tint frameCount = getFrameCount();\n\t\tchangeSupport.firePropertyChange(\n\t\t\tFRAME_COUNT_PROPERTY,\n\t\t\tframeCount - 1,\n\t\t\tframeCount);\n\n\t\tsizeChanged = true;\n\t}\n", + "comment": "adds the frame code frame code to the tag\n" + }, + { + "code_snippet":"\tpublic boolean remove(ID3v2Frame frame) {\n\t\tboolean success = frames.remove(frame);\n\n\t\tif (success) {\n\t\t\tframe.setTag(null);\n\t\t\tint frameCount = getFrameCount();\n\t\t\tchangeSupport.firePropertyChange(\n\t\t\t\tFRAME_COUNT_PROPERTY,\n\t\t\t\tframeCount + 1,\n\t\t\t\tframeCount);\n\n\t\t\tsizeChanged = true;\n\t\t}\n\t\treturn success;\n\t}\n", + "comment": "removes the frame code frame code from the tag\n" + }, + { + "code_snippet":"\tpublic void remove(int type) {\n\t\tremove(type, 0);\n\t}\n", + "comment": "removes the first frame of type code type code in the tag\n" + }, + { + "code_snippet":"\tpublic void removeAll(int type) {\n\t\tfor (int i = 0; i < frames.size(); ++i) {\n\t\t\tID3v2Frame frame = (ID3v2Frame) frames.elementAt(i);\n\t\t\tif (frame.getType() == type) {\n\t\t\t\tremove(frame);\n\t\t\t\tbreak;\n\t\t\t}\n\t\t}\n\t}\n", + "comment": "removes all frames of type code type code in the tag\n" + }, + { + "code_snippet":"\tpublic void removeAll() {\n\t\tint frameCount = frames.size();\n\t\tframes.clear();\n\t\tif (frameCount > 0) {\n\t\t\tchangeSupport.firePropertyChange(\n\t\t\t\tFRAME_COUNT_PROPERTY,\n\t\t\t\tframeCount,\n\t\t\t\t0);\n\t\t}\n\t}\n", + "comment": "removes all frames from the tag\n" + }, + { + "code_snippet":"\tpublic void recalculateSize() {\n\t\ttry {\n\t\t\tthis.contentSize = spec.getSize(this);\n\t\t} catch (RuntimeException e) {\n\t\t\tlogger.error(\"ID3v2Tag::recalculateSize(): \"\n\t\t\t\t+ \"Unexpected exception in spec \"+spec+\" getSize(ID3v2Tag).\\n\"\n\t\t\t\t+ \"Spec is probably buggy.\", e);\n\t\t\tthrow e;\n\t\t}\n\t\tsizeChanged = false;\n\t}\n", + "comment": "recalculates the binary size of the tag\n" + }, + { + "code_snippet":"\tpublic int getSize() {\n\t\trecalculateSizeIfNeccessary();\n\t\tif (preferredSizeMode == ID3Constants.ABSOLUTE)\n\t\t\treturn (contentSize < preferredSize ? preferredSize : contentSize);\n\t\telse /* if (preferredSizeType == ID3Constants.RELATIVE) */\n\t\t\treturn contentSize + preferredSize;\n\t}\n", + "comment": "returns the tags size including the padding\n" + }, + { + "code_snippet":"\tpublic int getPreferredSize() {\n\t\treturn preferredSize;\n\t}\n", + "comment": "returns the preferred size of the tag\n" + }, + { + "code_snippet":"\tpublic int getContentSize() {\n\t\trecalculateSizeIfNeccessary();\n\t\treturn contentSize;\n\t}\n", + "comment": "returns the tag size excluding the padding\n" + }, + { + "code_snippet":"\tpublic int getPaddingSize() {\n\t\tif (preferredSizeMode == ID3Constants.ABSOLUTE) {\n\t\t\trecalculateSizeIfNeccessary();\n\t\t\treturn (contentSize < preferredSize ? preferredSize - contentSize : 0);\n\t\t} else {\n\t\t\treturn preferredSize;\n\t\t}\n\t}\n", + "comment": "returns the amount of padding being used by the tag\n" + }, + { + "code_snippet":"\tpublic int getSpecPolicy() {\n\t\treturn specPolicy;\n\t}\n", + "comment": "returns the current spec policy\n" + }, + { + "code_snippet":"\tpublic int getVersion() {\n\t\treturn spec.getVersion();\n\t}\n", + "comment": "returns the version number of the current spec\n" + }, + { + "code_snippet":"\tpublic int getRevision() {\n\t\treturn spec.getRevision();\n\t}\n", + "comment": "returns the revision number of the current spec\n" + }, + { + "code_snippet":"\tpublic String getVersionString() {\n\t\treturn spec.toString();\n\t}\n", + "comment": "returns a string formatted 2\n" + }, + { + "code_snippet":"\tpublic boolean contains(int type) {\n\t\tID3v2Frame frame = get(type, 0);\n\t\tif (frame != null)\n\t\t\tlastRequestedFrame = frame;\n\t\treturn frame != null;\n\t}\n", + "comment": "returns whether or not the tag contains a frame of type\n" + }, + { + "code_snippet":"\tpublic int getFrameCount() {\n\t\treturn frames.size();\n\t}\n", + "comment": "returns the total amount of frames in the tag\n" + }, + { + "code_snippet":"\tpublic int getFrameCount(int type) {\n\t\tint count = 0;\n\t\tfor (int i = 0; i < frames.size(); ++i) {\n\t\t\tID3v2Frame current = (ID3v2Frame) frames.get(i);\n\t\t\tif (current.getType() == type)\n\t\t\t\t++count;\n\t\t}\n\t\treturn count;\n\t}\n", + "comment": "returns the amount of frames of type code type code in the tag\n" + }, + { + "code_snippet":"\tpublic ID3v2Frame get(int type) {\n\t\tif (lastRequestedFrame != null)\n\t\t\tif (lastRequestedFrame.getType() == type)\n\t\t\t\treturn lastRequestedFrame;\n\t\tlastRequestedFrame = get(type, 0);\n\t\treturn lastRequestedFrame;\n\t}\n", + "comment": "this method is a convenience for code get id 0 code\n" + }, + { + "code_snippet":"\tpublic ID3v2Frame get(int frameType, int propertyType, byte value) {\n\t\treturn get(frameType, propertyType, new byte[] { value });\n\t}\n", + "comment": "returns the frame which is of type code type code and additionally\n" + }, + { + "code_snippet":"\tpublic ID3v2Frame get(int frameType, int propertyType, byte[] value) {\n\t\ttry {\n\t\t\treturn get(\n\t\t\t\tframeType,\n\t\t\t\tpropertyType,\n\t\t\t\tnew String(value, 0, value.length, \"8859_1\"));\n\t\t} catch (UnsupportedEncodingException e) {\n\t\t\treturn null;\n\t\t}\n\t}\n", + "comment": "returns the frame which is of type code type code and additionally\n" + }, + { + "code_snippet":"\tpublic ID3v2Frame get(int frameType, int propertyType, String value) {\n\t\tfor (int i = 0; i < frames.size(); ++i) {\n\t\t\tID3v2Frame current = (ID3v2Frame) frames.elementAt(i);\n\t\t\tif (current.getType() == frameType)\n\t\t\t\tif (current.get(propertyType).toString().equals(value))\n\t\t\t\t\treturn current;\n\t\t}\n\t\treturn null;\n\t}\n", + "comment": "returns the frame which is of type code type code and additionally\n" + }, + { + "code_snippet":"\tpublic Iterator frames() {\n\t\treturn frames.iterator();\n\t}\n", + "comment": "returns an iteration over all frames in the tag\n" + }, + { + "code_snippet":"\tpublic boolean isExperimental() {\n\t\treturn experimental;\n\t}\n", + "comment": "returns whether or not the tag is to be considered experimental\n" + }, + { + "code_snippet":"\tpublic boolean getUseCRC() {\n\t\treturn useCRC;\n\t}\n", + "comment": "returns whether or not a crc32 is included in the binary tag\n" + }, + { + "code_snippet":"\tpublic boolean isRestricted() {\n\t\treturn restrictions != null;\n\t}\n", + "comment": "returns whether or not this tag is restricted in some way\n" + }, + { + "code_snippet":"\tpublic ID3v2Restrictions getRestrictions() {\n\t\treturn restrictions;\n\t}\n", + "comment": "returns the restrictions that affect this tag or code null code if\n" + }, + { + "code_snippet":"\tpublic void setSpecPolicy(int policy) throws IllegalArgumentException {\n\t\tif (policy != ID3Constants.FIXED_SPEC\n\t\t\t&& policy != ID3Constants.DYNAMIC_SPEC)\n\t\t\tthrow new IllegalArgumentException(String.valueOf(policy));\n\t\tthis.specPolicy = policy;\n\t}\n", + "comment": "sets the spec policy which may either be code id3 constants\n" + }, + { + "code_snippet":"\tpublic void setPreferredSize(int value, int preferredSizeMode) throws IllegalArgumentException {\n\t\tif ((preferredSizeMode != ID3Constants.ABSOLUTE\n\t\t\t&& preferredSizeMode != ID3Constants.RELATIVE)\n\t\t\t|| (value < 0))\n\t\t\tthrow new IllegalArgumentException();\n\n\t\tthis.preferredSize = value;\n\t\tthis.preferredSizeMode = preferredSizeMode;\n\t}\n", + "comment": "sets the preferred size of the tag\n" + }, + { + "code_snippet":"\tpublic void setExperimental(boolean b) throws IllegalTagStateException {\n\t\tif (!spec.supportsTagIsExperimentalIndicator())\n\t\t\trequestSpecWhich(\"supportsTagIsExperimentalIndicator\");\n\t\tboolean oldValue = experimental;\n\t\tboolean newValue = b;\n\t\tif (oldValue != newValue) {\n\t\t\tthis.experimental = newValue;\n\t\t\tchangeSupport.firePropertyChange(\n\t\t\t\tEXPERIMENTAL_PROPERTY,\n\t\t\t\toldValue,\n\t\t\t\tnewValue);\n\t\t}\n\t\tthis.sizeChanged = true;\n\t}\n", + "comment": "sets whether or not the tag is to be considered experimental\n" + }, + { + "code_snippet":"\tpublic int write(OutputStream out) throws IOException {\n\t\tif (frames.size() == 0)\n\t\t\tthrow new IOException(\"Tag does not have any frames\");\n\t\tlogger.debug(\"ID3v2Tag::write(OutputStream): \"\n\t\t\t+ \"Writing tag. Calling spec.write(this, out)\");\n\t\treturn spec.write(this, out);\n\t}\n", + "comment": "writes the tag to the\n" + }, + { + "code_snippet":"\tpublic K getKey(V value) {\n\t\treturn valueKeyMap.get(value);\n\t}\n", + "comment": "returns key for the given value\n" + }, + { + "code_snippet":"\tpublic String getErrorMessage() {\n\t\treturn hasNegativeCapacities ? ERROR_MESSAGE : \"\";\n\t}\n", + "comment": "if any errors exist return error message\n" + }, + { + "code_snippet":"\tpublic boolean hasNegativeCapacities() {\n\t\treturn hasNegativeCapacities;\n\t}\n", + "comment": "checks whether given graph had edges with negative capacities\n" + }, + { + "code_snippet":"\tpublic V extractMin() {\n\t\tif (isEmpty()) {\n\t\t\treturn null;\n\t\t}\n\t\tV tmp = heap.get(0);\n\t\tvalueIndexMap.remove(tmp);\n\t\tvalueKeyMap.remove(tmp);\n\t\theap.set(0, heap.get(size() - 1));\n\t\tvalueIndexMap.put(heap.get(0), 0);\n\t\theap.remove(size() - 1);\n\t\tsiftDown(0);\n\t\treturn tmp;\n\t}\n", + "comment": "removes the minimal element of this binary heap and returns it\n" + }, + { + "code_snippet":"\tpublic int getMaximumFlowSize() {\n\t\tif (hasNegativeCapacities()) {\n\t\t\tthrow new IllegalStateException(\"This graph has negative capacities. Ford-Falkerson doesn't work correctly.\");\n\t\t}\n\t\treturn maxFlowSize;\n\t}\n", + "comment": "return maximum flow size\n" + }, + { + "code_snippet":"\tpublic V getMin() {\n\t\tif (isEmpty()) {\n\t\t\treturn null;\n\t\t}\n\t\treturn heap.get(0);\n\t}\n", + "comment": "returns but not removes the minimal element of this binary heap\n" + }, + { + "code_snippet":"\tpublic void decreaseKey(V value, K newKey) {\n\t\tInteger tmp = valueIndexMap.get(value);//myData.indexOf(element);\n\t\tif (tmp == null) {\n\t\t\tthrow new NoSuchElementException();\n\t\t}\n\t\tif (valueKeyMap.get(value).compareTo(newKey) < 0) {\n\t\t\tthrow new IllegalArgumentException(\"The key replaced is less than the new one\");\n\t\t}\n\t\tvalueKeyMap.put(value, newKey);\n\t\tsiftUp(tmp);\n\t}\n", + "comment": "decreases key of the given element\n" + }, + { + "code_snippet":"\tpublic boolean hasLoops() {\n\t\treturn !loopEdges.isEmpty();\n\t}\n", + "comment": "checks whether graph has loops\n" + }, + { + "code_snippet":"\tpublic String getErrorMessage() {\n\t\treturn hasNegativeWeights ? ERROR_MESSAGE : \"\";\n\t}\n", + "comment": "if any errors exist return error message\n" + }, + { + "code_snippet":"\tpublic void increaseKey(V value, K newKey) {\n\t\tInteger tmp = valueIndexMap.get(value);//myData.indexOf(element);\n\t\tif (tmp == null) {\n\t\t\tthrow new NoSuchElementException();\n\t\t}\n\t\tif (valueKeyMap.get(value).compareTo(newKey) >= 0) {\n\t\t\tthrow new IllegalArgumentException(\"The key replaced is greater than the new one\");\n\t\t}\n\t\tvalueKeyMap.put(value, newKey);\n\t\tsiftDown(tmp);\n\t}\n", + "comment": "increases key of the given element\n" + }, + { + "code_snippet":"\tpublic String getErrorMessage() {\n\t\tif (hasLoops()) {\n\t\t\treturn ERROR_MESSAGE;\n\t\t}\n\t\treturn \"\";\n\t}\n", + "comment": "if any errors exist return error message\n" + }, + { + "code_snippet":"\tpublic boolean hasNegativeWeights() {\n\t\treturn hasNegativeWeights;\n\t}\n", + "comment": "checks whether givaen graph had edges with negative weights\n" + }, + { + "code_snippet":"\tpublic String getErrorMessage() {\n\t\treturn !isConnected() ? ERROR_MESSAGE : \"\";\n\t}\n", + "comment": "if any errors exist return error message\n" + }, + { + "code_snippet":"\tpublic void pushCommand(Command command) {\n\t\tredoCommands.clear();\n\t\tif (undoCommands.size() == capacity) {\n\t\t\tundoCommands.remove(0);\n\t\t}\n\t\tundoCommands.add(command);\n\t}\n", + "comment": "pushes new command to this buffer\n" + }, + { + "code_snippet":"\tpublic void performLayout() {\n\t\tfor (VertexPresentation vp : positionMap.keySet()) {\n\t\t\tvp.setCenter(positionMap.get(vp));\n\t\t}\n\t}\n", + "comment": "performs this layout\n" + }, + { + "code_snippet":"\tpublic void clear() {\n\t\tundoCommands.clear();\n\t\tredoCommands.clear();\n\t}\n", + "comment": "clears this buffer\n" + }, + { + "code_snippet":"\tpublic boolean hasUndo() {\n\t\treturn (undoCommands.size() > 0);\n\t}\n", + "comment": "checks whether there are command for undoing\n" + }, + { + "code_snippet":"\tpublic boolean hasRedo() {\n\t\treturn (redoCommands.size() > 0);\n\t}\n", + "comment": "checks whether there are command for redoing\n" + }, + { + "code_snippet":"\tpublic void addVisualStateListener(VisualStateListener listener) {\n\t\tlisteners.add(listener);\n\t}\n", + "comment": "adds the specified visual state listaner to recive notification about state\n" + }, + { + "code_snippet":"\tpublic void removeVisualStateListener(VisualStateListener listener) {\n\t\tlisteners.remove(listener);\n\t}\n", + "comment": "removes the specified visual state listaner from this presentation\n" + }, + { + "code_snippet":"\tpublic void setGraphPresentation(GraphPresentation graphPresentation) {\n\t\tthis.graphPresentation = graphPresentation;\n\t\tunselect();\n\t}\n", + "comment": "sets new graph presentation\n" + }, + { + "code_snippet":"\tpublic void setGraphPresentation(GraphPresentation graphPresentation) {\n\t\tthis.graphPresentation = graphPresentation;\n\t\tunmark();\n\t}\n", + "comment": "sets new graph presentation\n" + }, + { + "code_snippet":"\tpublic void paint(Graphics gr, Color backgroundColor) {\n\t\tGraphics2D gr2D = (Graphics2D) gr;\n\t\tStroke prevStroke = gr2D.getStroke();\n\t\tgr2D.setStroke(DASHED_STROKE);\n\t\timaginaryEdge.paint(gr2D, backgroundColor);\n\t\tgr2D.setStroke(prevStroke);\n\t}\n", + "comment": "paints imaginary edge\n" + }, + { + "code_snippet":"\tpublic void markEdges(Collection edgesToMark, boolean isError) {\n\t\tfor (Edge edge : edgesToMark) {\n\t\t\tgraphPresentation.getPresentationForEdge(edge).mark(isError ? ERROR_COLOR : MARK_COLOR);\n\t\t\tmarkedEdges.add(graphPresentation.getPresentationForEdge(edge));\n\t\t}\n\t\tfireVisualStateChange();\n\t}\n", + "comment": "marks presentations of given edges\n" + }, + { + "code_snippet":"\tpublic void selectEdge(EdgePresentation ep) {\n\t\tselectedEdges.add(ep);\n\t\tfireVisualStateChange();\n\t}\n", + "comment": "selects givet edge presentation\n" + }, + { + "code_snippet":"\tpublic boolean select(Point point) {\n\t\treturn selectVerticesOnPoint(point) || selectEdgesOnPoint(point);\n\t}\n", + "comment": "finds and marks element containing given point\n" + }, + { + "code_snippet":"\tpublic void selectInRect(int x1, int y1, int x2, int y2) {\n\t\tfor (VertexPresentation vp : graphPresentation.getVertexPresentations()) {\n\t\t\tif (vp.isInRect(x1, y1, x2, y2)) {\n\t\t\t\tselectedVertices.add(vp);\n\t\t\t}\n\t\t}\n\t\tfor (EdgePresentation ep : graphPresentation.getEdgePresentations()) {\n\t\t\tif (ep.isInRect(x1, y1, x2, y2)) {\n\t\t\t\tselectedEdges.add(ep);\n\t\t\t}\n\t\t}\n\t\tif ((selectedEdges.size() != 0) || (selectedVertices.size() != 0)) {\t\n\t\t\tfireVisualStateChange();\n\t\t}\n\t}\n", + "comment": "selects all item within given rectangle\n" + }, + { + "code_snippet":"\tpublic boolean isInSelectedItem(Point point) {\n\t\tfor (VertexPresentation vp : selectedVertices) {\n\t\t\tif (vp.containsPoint(point)) {\n\t\t\t\treturn true;\n\t\t\t}\n\t\t}\n\t\tfor (EdgePresentation ep : selectedEdges) {\n\t\t\tif (ep.containsPoint(point)) {\n\t\t\t\treturn true;\n\t\t\t}\n\t\t}\n\t\treturn false;\n\t}\n", + "comment": "checkes whether given point lies whithin an already selected item\n" + }, + { + "code_snippet":"\tpublic void markVertex(VertexPresentation vp) {\n\t\tmarkedVertices.add(vp);\n\t\tfireVisualStateChange();\n\t}\n", + "comment": "markes selected vertex\n" + }, + { + "code_snippet":"\tpublic void unselect() {\n\t\tselectedEdges.clear();\n\t\tselectedVertices.clear();\n\t\tfireVisualChandesCancelled();\n\t}\n", + "comment": "unselects previously selected elements\n" + }, + { + "code_snippet":"\tpublic void markVertices(Collection verticesToMark) {\n\t\tfor (Vertex vertex : verticesToMark) {\n\t\t\tmarkedVertices.add(graphPresentation.getPresentationForVertex(vertex));\t\t\t\n\t\t}\n\t\tfireVisualStateChange();\n\t}\n", + "comment": "markes given vertices\n" + }, + { + "code_snippet":"\tpublic void unmark() {\n\t\tfor (Iterator it = markedEdges.iterator(); it.hasNext();) {\n\t\t\tEdgePresentation ep = it.next();\n\t\t\tep.unmark();\n\t\t\tit.remove();\n\t\t}\n\t\tverticesData.clear();\n\t\tedgesData.clear();\n\t\tmarkedVertices.clear();\n\t\tfireVisualChandesCancelled();\n\t}\n", + "comment": "unmarks previously marked elements\n" + }, + { + "code_snippet":"\tpublic void unmarkEdges() {\n\t\tfor (Iterator it = markedEdges.iterator(); it.hasNext();) {\n\t\t\tEdgePresentation ep = it.next();\n\t\t\tep.unmark();\n\t\t\tit.remove();\n\t\t}\n\t\t//if ((verticesData.size() == 0) && (markedVertices.size() == 0)) {\n\t\t//\tfireVisualChandesCancelled();\n\t\t//} else {\n\t\t\tfireVisualStateChange();\n\t\t//}\n\t}\n", + "comment": "unmarks previously marked edges\n" + }, + { + "code_snippet":"\tpublic EdgePresentation getEdgeWithSelectedWeight(Point point) {\n\t\tif (!graphPresentation.isWeighted()) {\n\t\t\treturn null;\n\t\t}\n\t\tfor (EdgePresentation ep : graphPresentation.getEdgePresentations()) {\n\t\t\tif (ep.isWeightSelected(point)) {\n\t\t\t\treturn ep;\n\t\t\t}\n\t\t}\n\t\treturn null;\n\t}\n", + "comment": "if graph is weighted and weight of some edge contains selected point\n" + }, + { + "code_snippet":"\tpublic void unmarkVertices() {\n\t\tmarkedVertices.clear();\n\t\tfireVisualStateChange();\n\t}\n", + "comment": "unmarks previously marked vertices\n" + }, + { + "code_snippet":"\tpublic EdgePresentation getEdgeWithSelectedEnd(Point point) {\n\t\tfor (EdgePresentation ep : selectedEdges) {\n\t\t\tif (isEdgeEndSelected(ep, point) || isEdgeStartSelected(ep, point)) {\n\t\t\t\treturn ep;\n\t\t\t}\n\t\t}\n\t\treturn null;\n\t}\n", + "comment": "if any selected edge end mark contains given point returns this edge\n" + }, + { + "code_snippet":"\tpublic void setGraphPresentation(GraphPresentation presentation) {\n\t\tremoveCurrentPresentation();\n\t\tundoClear();\n\t\tattachNewPresentation(presentation);\n\t}\n", + "comment": "sets new graph presentation to this component\n" + }, + { + "code_snippet":"\tpublic boolean isEdgeEndSelected(EdgePresentation ep, Point point) {\n\t\treturn (square(ep.getEnd().x - point.x) + square(ep.getEnd().y - point.y) < square(EDGE_SELECTION_RADIX));\n\t}\n", + "comment": "veryfies whether edge end mark was selected\n" + }, + { + "code_snippet":"\tpublic boolean isEdgeStartSelected(EdgePresentation ep, Point point) {\n\t\treturn (square(ep.getStart().x - point.x) + square(ep.getStart().y - point.y) < square(EDGE_SELECTION_RADIX));\n\t}\n", + "comment": "veryfies whether edge start mark was selected\n" + }, + { + "code_snippet":"\tpublic Command getNewAddVertexCommand(Point point) {\n\t\treturn new AddVertexCommand(point, graphPresentation, graphSelection);\n\t}\n", + "comment": "returns new add vertex command\n" + }, + { + "code_snippet":"\tpublic Command getNewAddEdgeCommand(VertexPresentation sourcePr, VertexPresentation destinPr) {\n\t\treturn new AddEdgeCommand(sourcePr, destinPr, graphPresentation, graphSelection);\n\t}\n", + "comment": "returns new add edge command\n" + }, + { + "code_snippet":"\tpublic Command getNewEdgeEndEditCommand(EdgePresentation edge, VertexPresentation sourcePr, VertexPresentation destinPr) {\n\t\treturn new EdgeEndEditCommand(edge, sourcePr, destinPr, graphPresentation, graphSelection);\n\t}\n", + "comment": "returns new edge end editing command\n" + }, + { + "code_snippet":"\tpublic Command getNewDeleteCommand() {\n\t\treturn new DeleteCommand(graphPresentation, graphSelection);\n\t}\n", + "comment": "returns new delete command\n" + }, + { + "code_snippet":"\tpublic Command getNewDragCommand(Point start, Point end) {\n\t\treturn new DragCommand(start, end, graphPresentation, graphSelection);\n\t}\n", + "comment": "returns new drag command\n" + }, + { + "code_snippet":"\tpublic Command getNewDragCommand(Map beforeDragPositions) {\n\t\treturn new DragCommand(beforeDragPositions, graphPresentation, graphSelection);\n\t}\n", + "comment": "returns new drag command\n" + }, + { + "code_snippet":"\tpublic void setTool(Tool tool) {\n\t\tcurrentTool.finalActions();\n\t\tcurrentTool = tool;\n\t}\n", + "comment": "sets a tool to handle events\n" + }, + { + "code_snippet":"\tpublic Command getNewLayoutCommand(Layout layout) {\n\t\treturn new LayoutCommand(layout, graphPresentation);\n\t}\n", + "comment": "returns new layout command\n" + }, + { + "code_snippet":"\tpublic GraphPresentation getGraphPresentation() {\n\t\treturn graphPresentation;\n\t}\n", + "comment": "returns graph presentation\n" + }, + { + "code_snippet":"\tpublic Command getWeightChangeCommand(int oldWeight, int newWeight, EdgePresentation edge) {\n\t\treturn new WeightChangeCommand(oldWeight, newWeight, edge, graphSelection);\n\t}\n", + "comment": "returns new weight change command\n" + }, + { + "code_snippet":"\tpublic Command getConvertToWeightedCommand(PresentationController controller) {\n\t\treturn new ConvertToWeightedCommand(graphPresentation, controller);\n\t}\n", + "comment": "returns new convert to weighted command\n" + }, + { + "code_snippet":"\tpublic Command getConvertToUnweightedCommand(PresentationController controller) {\n\t\treturn new ConvertToUnweightedCommand(graphPresentation, controller);\n\t}\n", + "comment": "returns new convert to unweighted command\n" + }, + { + "code_snippet":"\tpublic Command getConvertToUndirectedCommand(PresentationController controller) {\n\t\treturn new ConvertToUndirectedCommand(graphPresentation, controller);\n\t}\n", + "comment": "returns new convert to undirected command\n" + }, + { + "code_snippet":"\tpublic Tool getSelectTool() {\n\t\tselectTool.returnToInitialState();\n\t\treturn selectTool;\n\t}\n", + "comment": "returns select tool\n" + }, + { + "code_snippet":"\tpublic Tool getAddVertexTool() {\n\t\treturn addVertexTool;\n\t}\n", + "comment": "returns add vertex tool\n" + }, + { + "code_snippet":"\tpublic Tool getAddEdgeTool() {\n\t\treturn addEdgeTool;\n\t}\n", + "comment": "returns add edge tool\n" + }, + { + "code_snippet":"\tpublic Tool getDijkstraSelectionTool() {\n\t\tdijkstraSelectTool.returnToInitialState();\n\t\treturn dijkstraSelectTool;\n\t}\n", + "comment": "returns dijkstra selection tool\n" + }, + { + "code_snippet":"\tpublic Tool getMaxFlowSelectionTool() {\n\t\tmaxFlowSelectionTool.returnToInitialState();\n\t\treturn maxFlowSelectionTool;\n\t}\n", + "comment": "returns max flow selection tool\n" + }, + { + "code_snippet":"\tpublic ToolFactory getToolFactory() {\n\t\treturn toolFactory;\n\t}\n", + "comment": "returns tool factory\n" + }, + { + "code_snippet":"\tpublic EditingActionsFactory getEditingActionsFactory() {\n\t\treturn editingActionsFactory;\n\t}\n", + "comment": "returns component editing actions factory\n" + }, + { + "code_snippet":"\tpublic AlgorythmActionsFactory getAlgorythmActionsFactory() {\n\t\treturn algorythmActionsFactory;\n\t}\n", + "comment": "returns component algorythm actions factory\n" + }, + { + "code_snippet":"\tpublic boolean isDirected() {\n\t\tif (!wasTypeChoosen) {\n\t\t\tthrow new IllegalStateException(\"User didn't choose anything\");\n\t\t}\n\t\treturn isDirected;\n\t}\n", + "comment": "returns whether user has choosen directed or undirected graph\n" + }, + { + "code_snippet":"\tpublic ConvertionActionsFactory getConvertionActionsFactory() {\n\t\treturn convertionActionsFactory;\n\t}\n", + "comment": "returns component convertion actions factory\n" + }, + { + "code_snippet":"\tpublic boolean isWeighted() {\n\t\tif (!wasTypeChoosen) {\n\t\t\tthrow new IllegalStateException(\"User didn't choose anything\");\n\t\t}\n\t\treturn isWeighted;\n\t}\n", + "comment": "returns whether user has choosen directed or undirected graph\n" + }, + { + "code_snippet":"\tpublic void addTempComponent(Component component) {\n\t\tadd(component);\n\t\trevalidate();\n\t\trepaint();\n\t}\n", + "comment": "adds temrorary component to this component\n" + }, + { + "code_snippet":"\tpublic void removeTempComponent(Component component) {\n\t\tremove(component);\n\t\trevalidate();\n\t\trepaint();\n\t}\n", + "comment": "removes temprorary component from this component\n" + }, + { + "code_snippet":"\tpublic void addCommand(Command command) {\n\t\twasChanged = true;\n\t\tundoBuffer.pushCommand(command);\n\t\teditingActionsFactory.getUndoAction().setEnabled(true);\n\t\teditingActionsFactory.getRedoAction().setEnabled(false);\n\t}\n", + "comment": "adds new command to this component\n" + }, + { + "code_snippet":"\tpublic void finalActions() {\n\t}\n", + "comment": "performs required actions before another tool is set\n" + }, + { + "code_snippet":"\tpublic void setState(ToolState state) {\n\t\t\n\t}\n", + "comment": "sets new state for this tool\n" + }, + { + "code_snippet":"\tpublic void paint(Graphics g, Color backgroundColor) {\n\n\t}\n", + "comment": "performs any additional painting required\n" + }, + { + "code_snippet":"\tpublic void addAlgorythmErrorListener(AlgorythmMessageListener listener) {\n\t\tlisteners.add(listener);\n\t}\n", + "comment": "adds the specified algorythm error listener to recive notification about state\n" + }, + { + "code_snippet":"\tpublic ToggleAction getSelectAction() {\n\t\treturn selectAction;\n\t}\n", + "comment": "returns an action for selecting mode\n" + }, + { + "code_snippet":"\tpublic void removeAlgorythmErrorListener(AlgorythmMessageListener listener) {\n\t\tlisteners.remove(listener);\n\t}\n", + "comment": "removes the specified algorythm error listener from this presentation\n" + }, + { + "code_snippet":"\tpublic void addButton(AbstractButton button) {\n\t\tbuttons.add(button);\n\t}\n", + "comment": "adds button to the list of users\n" + }, + { + "code_snippet":"\tpublic ToggleAction getAddVertexAction() {\n\t\treturn addVertexAction;\n\t}\n", + "comment": "returns an action for adding vertices mode\n" + }, + { + "code_snippet":"\tpublic ToggleAction getAddEdgeAction() {\n\t\treturn addEdgeAction;\n\t}\n", + "comment": "returns an action for adding edges mode\n" + }, + { + "code_snippet":"\tpublic Action getToWeightedAction() {\n\t\treturn toWeightedAction;\n\t}\n", + "comment": "returns an action for converting graph from unweighted to weighted\n" + }, + { + "code_snippet":"\tpublic Action getToUnweightedAction() {\n\t\treturn toUnweightedAction;\n\t}\n", + "comment": "returns an action for converting graph from weighted to unweighted\n" + }, + { + "code_snippet":"\tpublic Action getToUndirectedAction() {\n\t\treturn toUndirectedAction;\n\t}\n", + "comment": "returns an action for converting graph from directed to undirected\n" + }, + { + "code_snippet":"\tpublic Action getCircleLayoutAction() {\n\t\treturn circleLayoutAction;\n\t}\n", + "comment": "returns an action for layout\n" + }, + { + "code_snippet":"\tpublic void messageUnactive() {\n\t\tif (isMessageActive) {\n\t\t\tfireMessageUnactive();\n\t\t}\n\t}\n", + "comment": "makes required actions if message is no longer active\n" + }, + { + "code_snippet":"\tpublic Action getDiscardHighlightingAction() {\n\t\treturn discardHighlightingAction;\n\t}\n", + "comment": "return an action for manually discaring highlighting\n" + }, + { + "code_snippet":"\tpublic void meaasgeActive(String message, boolean isErrorMessage) {\n\t\tfireMessageRecieved(message, isErrorMessage);\n\t}\n", + "comment": "makes required actions if message is active\n" + }, + { + "code_snippet":"\tpublic Action getExitAction() {\n\t\treturn exitAction;\n\t}\n", + "comment": "returns an action for exit\n" + }, + { + "code_snippet":"\tpublic Action getTopSortAction() {\n\t\treturn topSortAction;\n\t}\n", + "comment": "returns an action for topological sort\n" + }, + { + "code_snippet":"\tpublic Action getDijkstraTreeAction() {\n\t\treturn dijkstraAction;\n\t}\n", + "comment": "returns an action for dijkstra algorythm\n" + }, + { + "code_snippet":"\tpublic Action getMinSpanningTreeAction() {\n\t\treturn mstAction;\n\t}\n", + "comment": "returns an action for minimal spanning tree algorythm\n" + }, + { + "code_snippet":"\tpublic Action getMaxFlowAction() {\n\t\treturn maxFlowAction;\n\t}\n", + "comment": "returns an action for maximum flow algorythm\n" + }, + { + "code_snippet":"\tpublic Action getSaveAsAction() {\n\t\treturn saveAsAction;\n\t}\n", + "comment": "returns an action for saving as\n" + }, + { + "code_snippet":"\tpublic void addElementStateListener(ElementStateListener listener) {\n\t\tlisteners.add(listener);\n\t}\n", + "comment": "adds the specified element state listaner to recive notification about state\n" + }, + { + "code_snippet":"\tpublic void removeElementStateListener(ElementStateListener listener) {\n\t\tlisteners.remove(listener);\n\t}\n", + "comment": "removes the specified element state listaner from this presentation\n" + }, + { + "code_snippet":"\tpublic Point getCenter() {\n\t\treturn (Point) center.clone();\n\t}\n", + "comment": "retrieves the center point of this presentation\n" + }, + { + "code_snippet":"\tpublic void setCenter(Point center) {\n\t\tthis.center.setLocation(center);\n\t\tfirePositionChanged();\n\t}\n", + "comment": "sets the center point of this presentation\n" + }, + { + "code_snippet":"\tpublic void paint(Graphics g) {\n\t\tg.setColor(Color.WHITE);\n\t\tg.fillOval(center.x - RADIX, center.y - RADIX, 2 * RADIX, 2 * RADIX);\n\t\tg.setColor(borderColor);\n\t\tg.drawOval(center.x - RADIX, center.y - RADIX, 2 * RADIX, 2 * RADIX);\n\t}\n", + "comment": "paints this vertex using given graphics\n" + }, + { + "code_snippet":"\tpublic boolean containsPoint(Point point) {\n\t\treturn (square(point.x - center.x) + square(point.y - center.y) < square(RADIX) );\n\t}\n", + "comment": "verifies whether this presentation contains given point\n" + }, + { + "code_snippet":"\tpublic boolean isInRect(int x1, int y1, int x2, int y2) {\n\t\treturn (Math.min(x1, x2) <= center.x) && (Math.max(x1, x2) >= center.x) && (Math.min(y1, y2) <= center.y) && (Math.max(y1, y2) >= center.y);\n\t}\n", + "comment": "verifies whether this presentation lays whithin giver rectangle\n" + }, + { + "code_snippet":"\tpublic void addGraphStateListener(GraphStateListener listener) {\n\t\tlisteners.add(listener);\n\t}\n", + "comment": "adds the specified graph state listaner to recive notification about state\n" + }, + { + "code_snippet":"\tpublic void removeGraphStateListener(GraphStateListener listener) {\n\t\tlisteners.remove(listener);\n\t}\n", + "comment": "removes the specified graph state listaner from this presentation\n" + }, + { + "code_snippet":"\tpublic void pauseFiringChanges() {\n\t\tisPaused = true;\n\t}\n", + "comment": "pauses all changes firing until resume is not called\n" + }, + { + "code_snippet":"\tpublic void resumeFiringChanges() {\n\t\tisPaused = false;\n\t\tif (wasPositionChangedRecieved) {\n\t\t\tfirePositionChanged();\n\t\t\twasPositionChangedRecieved = false;\n\t\t}\n\t\tif (wasStructureChangedRecieved) {\n\t\t\tfireStructureChanged();\n\t\t\twasStructureChangedRecieved = false;\n\t\t}\n\t}\n", + "comment": "resumes firing changes\n" + }, + { + "code_snippet":"\tpublic Point getStart() {\n\t\tVertexPresentation sourcePresentation = verticesMap.get(edge\n\t\t\t\t.getSource());\n\t\tPoint sourceCenter = sourcePresentation.getCenter();\n\t\tVertexPresentation destinationPresentation = verticesMap.get(edge\n\t\t\t\t.getDestination());\n\t\tPoint destinationCenter = destinationPresentation.getCenter();\n\t\treturn calculatePoint(destinationCenter, sourceCenter);\n\t}\n", + "comment": "returns edge start point\n" + }, + { + "code_snippet":"\tpublic void paint(Graphics gr, Color backgroundColor) {\n\t\tfor (EdgePresentation ep : edges) {\n\t\t\tep.paint(gr, backgroundColor);\n\t\t}\n\t\tfor (VertexPresentation vp : vertices) {\n\t\t\tvp.paint(gr);\n\t\t}\n\t}\n", + "comment": "paints this graph using given graphics\n" + }, + { + "code_snippet":"\tpublic void addVertex(VertexPresentation vp) {\n\t\tgraph.addVertex(vp.getVertex());\n\t\tvertices.add(vp);\n\t\tverticesMap.put(vp.getVertex(), vp);\n\t\tvp.addElementStateListener(elementStateListener);\n\t\tfireStructureChanged();\n\t}\n", + "comment": "adds this vertex presentation to graph presentation\n" + }, + { + "code_snippet":"\tpublic void removeVertex(VertexPresentation vp) {\n\t\tfor (Edge edge : vp.getVertex().getOutgoingEdges()) {\n\t\t\tEdgePresentation ep = edgesMap.remove(edge);\n\t\t\tedges.remove(ep);\n\t\t}\n\t\tfor (Edge edge : vp.getVertex().getIncomingEdges()) {\n\t\t\tEdgePresentation ep = edgesMap.remove(edge);\n\t\t\tedges.remove(ep);\n\t\t}\n\t\tgraph.removeVertex(vp.getVertex());\n\t\tvertices.remove(vp);\n\t\tverticesMap.remove(vp);\n\t\tvp.removeElementStateListener(elementStateListener);\n\t\tfireStructureChanged();\n\t}\n", + "comment": "removes this vertex presentation from graph presentation\n" + }, + { + "code_snippet":"\tpublic void paint(Graphics gr, Color backgroundColor) {\n\t\tgr.setColor(color);\n\t\tPoint start = getStart();\n\t\tPoint end = getEnd();\n\t\t\n\t\tgr.drawLine(start.x, start.y, end.x, end.y);\n\t\tif (isDirected) {\n\t\t\tdrawArrow(gr, start, end);\n\t\t}\n\t\tif (isWeighted) {\n\t\t\tdrawWeight(gr, start, end, backgroundColor);\n\t\t}\n\t}\n", + "comment": "paints this edge using given graphics\n" + }, + { + "code_snippet":"\tpublic void addEdge(EdgePresentation ep) {\n\t\tgraph.addEdge(ep.getEdge());\n\t\tedges.add(ep);\n\t\tedgesMap.put(ep.getEdge(), ep);\n\t\tep.addElementStateListener(elementStateListener);\n\t\tfireStructureChanged();\n\t}\n", + "comment": "adds this edge presentation to graph presentation\n" + }, + { + "code_snippet":"\tpublic void removeEdge(EdgePresentation ep) {\n\t\tgraph.disconnect(ep.getEdge().getSource(), ep.getEdge()\n\t\t\t\t.getDestination());\n\t\tedges.remove(ep);\n\t\tedgesMap.remove(ep);\n\t\tep.removeElementStateListener(elementStateListener);\n\t\tfireStructureChanged();\n\t}\n", + "comment": "removes this edge presentation from graph presentation\n" + }, + { + "code_snippet":"\tpublic boolean areConnected(VertexPresentation sourcePr, VertexPresentation destinPr) {\n\t\treturn graph.areConnected(sourcePr.getVertex(), destinPr.getVertex());\n\t}\n", + "comment": "checks whether given vertices are connected\n" + }, + { + "code_snippet":"\tprivate void validate() throws ParserConfigurationException, SAXException, IOException {\n\t\tDocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();\n\t\tfactory.setAttribute(JAXP_SCHEMA_LANGUAGE, W3C_XML_SCHEMA);\n\t\tfactory.setAttribute(JAXP_SCHEMA_SOURCE, new File(ClassLoader\n\t\t\t\t\t.getSystemResource(MY_SCHEMA).getFile()));\n\t\t\n\t\t\n\t\tDocumentBuilder builder = factory.newDocumentBuilder();\n\t\tbuilder.setErrorHandler(errorHandler);\n\t\tdocument = builder.parse(file);\n\t\tif (getErrorMessages().size() != 0) {\n\t\t\tthrow new SAXException();\n\t\t}\n\t}\n", + "comment": "validates file according to xml schema\n" + }, + { + "code_snippet":"\tpublic Dimension getPreferredSize() {\n\t\tint resultX = 0;\n\t\tint resultY = 0;\n\t\tfor (VertexPresentation vp : vertices) {\n\t\t\tresultX = Math.max(resultX, vp.getCenter().x + VertexPresentation.RADIX + 1);\n\t\t\tresultY = Math.max(resultY, vp.getCenter().y + VertexPresentation.RADIX + 1);\n\t\t}\n\t\treturn new Dimension(resultX, resultY);\n\t}\n", + "comment": "returns preferred graph view size\n" + }, + { + "code_snippet":"\tpublic void parseGraph() throws ParserConfigurationException, SAXException, IOException {\n\t\tvalidate();\n\t\t\n\t\tElement root = document.getDocumentElement();\n\t\tNode graphNode = root.getElementsByTagName(\"model\").item(0);\n\t\tNodeList vertices = root.getElementsByTagName(\"vertex\");\n\t\tNodeList edges = root.getElementsByTagName(\"edge\");\n\t\t\n\t\tif (\"yes\".equals(graphNode.getAttributes().getNamedItem(\"directed\").getNodeValue())) {\n\t\t\tgraph = new DirectedGraph();\n\t\t\tisDirected = true;\n\t\t} else {\n\t\t\tgraph = new UndirectedGraph();\n\t\t\tisDirected = false;\n\t\t}\n\t\t\n\t\tparseGraphElements(vertices, edges);\n\t}\n", + "comment": "parses graph from file\n" + }, + { + "code_snippet":"\tpublic AbstractGraph getGraph() {\n\t\treturn graph;\n\t}\n", + "comment": "returns model graph of this presentation\n" + }, + { + "code_snippet":"\tpublic void parsePresentation() throws ParserConfigurationException, SAXException, IOException {\n\t\tparseGraph();\n\t\t\n\t\tElement root = document.getDocumentElement();\n\t\tNode presentationNode = root.getElementsByTagName(\"presentation\").item(0);\n\t\tNodeList centers = root.getElementsByTagName(\"center\");\n\t\t\n\t\tif (\"yes\".equals(presentationNode.getAttributes().getNamedItem(\"weighted\").getNodeValue())) {\n\t\t\tpresentation = new GraphPresentation(graph, isDirected, true);\n\t\t} else {\n\t\t\tpresentation = new GraphPresentation(graph, isDirected, false);\n\t\t}\n\t\t\n\t\tparseVertexCenters(centers);\n\t}\n", + "comment": "parses graph presentation from file\n" + }, + { + "code_snippet":"\tpublic boolean isWeighted() {\n\t\treturn isWeighted;\n\t}\n", + "comment": "checks whether this presentation is weighted\n" + }, + { + "code_snippet":"\tpublic void setWeight(int weight) {\n\t\tedge.setWeight(weight);\n\t\tfireStructureChanged();\n\t}\n", + "comment": "sets weight of this presentation\n" + }, + { + "code_snippet":"\tpublic AbstractGraph getGraph() {\n\t\tif (graph == null) {\n\t\t\tthrow new IllegalStateException(\"Graph hasn't been parsed yet!\");\n\t\t}\n\t\treturn graph;\n\t}\n", + "comment": "returns parsed graph\n" + }, + { + "code_snippet":"\tpublic GraphPresentation getGraphPresentation() {\n\t\tif (presentation == null) {\n\t\t\tthrow new IllegalStateException(\"Presentation hasn't been parsed yet!\");\n\t\t}\n\t\treturn presentation;\n\t}\n", + "comment": "returns parsed graph presentation\n" + }, + { + "code_snippet":"\tpublic int getWeight() {\n\t\treturn edge.getWeight();\n\t}\n", + "comment": "returns this presentation weight\n" + }, + { + "code_snippet":"\tpublic void saveGraph(AbstractGraph graph) throws ParserConfigurationException, TransformerException {\n\t\tcreateDOMTreeRoot();\n\t\tgraphElement.appendChild(createModelElement(graph));\n\t\twriteTreeToFile();\n\t}\n", + "comment": "saves given graph to xml file\n" + }, + { + "code_snippet":"\tpublic void saveGraphPresentation(GraphPresentation graphPresentation) throws ParserConfigurationException, TransformerException {\n\t\tcreateDOMTreeRoot();\n\t\tgraphElement.appendChild(createModelElement(graphPresentation.getGraph()));\n\t\tgraphElement.appendChild(createPresentationElement(graphPresentation));\n\t\twriteTreeToFile();\n\t}\n", + "comment": "saves given graph presentation to xml file\n" + }, + { + "code_snippet":"\tpublic Point getUpperLeftWeightPoint() {\n\t\tif (!isWeighted) {\n\t\t\treturn null;\n\t\t}\n\t\tint x = (getStart().x + getEnd().x) / 2;\n\t\tint y = (getStart().y + getEnd().y) / 2;\n\t\treturn new Point(x - WEIGHT_SHIFT / 2, y - weightHeight / 2 - WEIGHT_SHIFT\n\t\t\t\t/ 2);\n\t}\n", + "comment": "return upper left point of weight rect\n" + }, + { + "code_snippet":"\tpublic void mark(Color markColor) {\n\t\tthis.color = markColor;\n\t}\n", + "comment": "marks this presentation\n" + }, + { + "code_snippet":"\tpublic Vertex getDestination() {\n\t\treturn destination;\n\t}\n", + "comment": "returns a destination vertex of this edge\n" + }, + { + "code_snippet":"\tpublic Vertex getOppositeVertex(Vertex vertex) {\n\t\treturn vertex.equals(source) ? destination : (vertex\n\t\t\t\t.equals(destination) ? source : null);\n\t}\n", + "comment": "returns a vertex opposite to the given one\n" + }, + { + "code_snippet":"\tpublic void unmark() {\n\t\tcolor = Color.black;\n\t}\n", + "comment": "unmarks this presentation\n" + }, + { + "code_snippet":"\tpublic void addVertex(Vertex vertex) {\n\t\tif (vertices.contains(vertex)) {\n\t\t\tthrow new IllegalArgumentException(\"Vertex already in graph\");\n\t\t}\n\t\tif ((vertex.getOutgoingEdges().size() != 0) || (vertex.getIncomingEdges().size() != 0)) {\n\t\t\tthrow new IllegalArgumentException(\"Vertex has adjusted edges\");\n\t\t}\n\t\tvertices.add(vertex);\n\t}\n", + "comment": "adds given vertex to graph\n" + }, + { + "code_snippet":"\tpublic boolean containsPoint(Point point) {\n\t\tPoint start = getStart();\n\t\tPoint end = getEnd();\n\t\tint scalarProdWithStart = scalarProduct(point.x - start.x, point.y\n\t\t\t\t- start.y, end.x - start.x, end.y - start.y);\n\t\tint scalarProdWithEnd = scalarProduct(point.x - end.x, point.y - end.y,\n\t\t\t\tend.x - start.x, end.y - start.y);\n\t\tif ((scalarProdWithStart >= 0) && (scalarProdWithEnd <= 0)) {\n\t\t\treturn (distanceToEdge(point, start, end) <= SELECTING_DISTANCE);\n\n\t\t}\n\t\treturn false;\n\t}\n", + "comment": "verifies whether this presentation contains given point\n" + }, + { + "code_snippet":"\tpublic boolean areConnected(Vertex source, Vertex destination) {\n\t\treturn (getConnectingEdge(source, destination) != null);\n\t}\n", + "comment": "checkes whether two given vertices are connected with an edge\n" + }, + { + "code_snippet":"\tpublic boolean isInRect(int x1, int y1, int x2, int y2) {\n\t\treturn isPointInRect(getStart(), x1, y1, x2, y2)\n\t\t\t\t&& isPointInRect(getEnd(), x1, y1, x2, y2);\n\t}\n", + "comment": "verifies whether this presentation lays whithin giver rectangle\n" + }, + { + "code_snippet":"\tprotected void testVertexToConnect(Vertex source, Vertex destination) {\n\t\ttestVertices(source, destination);\n\t\tif (areConnected(source, destination)) {\n\t\t\tthrow new IllegalArgumentException(\"Vertices already connected\");\n\t\t}\n\t}\n", + "comment": "checkes all contracts of connect\n" + }, + { + "code_snippet":"\tprotected void addVertexToGraph(Vertex vertex) {\n\t\tvertices.add(vertex);\n\t}\n", + "comment": "enables subclasses to add vertices to graph\n" + }, + { + "code_snippet":"\tprotected void removeVertexFromGraph(Vertex vertex) {\n\t\tvertex.removeEdges();\n\t\tvertices.remove(vertex);\n\t}\n", + "comment": "enables subclasses to remove vertices from graph\n" + }, + { + "code_snippet":"\tpublic boolean isWeightSelected(Point point) {\n\t\tint x = (getStart().x + getEnd().x) / 2;\n\t\tint y = (getStart().y + getEnd().y) / 2;\n\t\treturn (isWeighted && (x - WEIGHT_SHIFT / 2) <= point.x)\n\t\t\t\t&& (point.x <= (x + weightWidth + WEIGHT_SHIFT))\n\t\t\t\t&& ((y - weightHeight - WEIGHT_SHIFT / 2) <= point.y)\n\t\t\t\t&& (point.y <= (y + weightHeight + WEIGHT_SHIFT));\n\t}\n", + "comment": "verifies whether given point is in this prenentation weight rect\n" + }, + { + "code_snippet":"\tprotected boolean isVertexInGraph(Vertex vertex) {\n\t\treturn vertices.contains(vertex);\n\t}\n", + "comment": "checks whether the given vertex is in graph\n" + }, + { + "code_snippet":"\tprotected void testVertices(Vertex source, Vertex destination) {\n\t\tif (!isVertexInGraph(source)) {\n\t\t\tthrow new IllegalArgumentException(\n\t\t\t\t\t\"Source vertex not in this graph.\");\n\t\t}\n\t\tif (!isVertexInGraph(destination)) {\n\t\t\tthrow new IllegalArgumentException(\n\t\t\t\t\t\"Destination vertex not in this graph.\");\n\t\t}\n\t}\n", + "comment": "checks whether the given vertices are in graph\n" + }, + { + "code_snippet":"\tpublic void testWrongDisconnection() {\n\t\tAssert.assertTrue(!graph.disconnect(vertices[0], vertices[3]));\n\t}\n", + "comment": "tests special case of\n" + }, + { + "code_snippet":" public Class getComponentType() {\n return type;\n }\n", + "comment": "get the component type\n" + }, + { + "code_snippet":" public Object getComponentKey() {\n return ckey;\n }\n", + "comment": "get the component key\n" + }, + { + "code_snippet":" public int getOrdinalPosition() {\n return num;\n }\n", + "comment": "get the ordinal position of the parameter starting from 0\n" + }, + { + "code_snippet":" public ComponentMonitor getComponentMonitor(){\n return (ComponentMonitor)java.lang.reflect.Proxy.newProxyInstance(\n getClass().getClassLoader(), new Class[]{ComponentMonitor.class},\n new java.lang.reflect.InvocationHandler(){\n public Object invoke(Object proxy, Method method, Object[] args) throws Throwable {\n return fire(method, args);\n }\n }\n );\n }\n", + "comment": "to create a component monitor instance that sequentially invoke\n" + }, + { + "code_snippet":" public Signature getSource() {\n return source;\n }\n", + "comment": "to get the signature of the method call that caused this error\n" + }, + { + "code_snippet":" public Object getPropertyKey() {\n return lkey;\n }\n", + "comment": "get the property key\n" + }, + { + "code_snippet":" public Class getParameterType() {\n return type;\n }\n", + "comment": "get the expected property type\n" + }, + { + "code_snippet":" public boolean isPooled(){\n return pooled;\n }\n", + "comment": "is this pool currently having something in cache\n" + }, + { + "code_snippet":" public Object getObject() throws Exception {\n return yan.instantiateComponent(cc);\n }\n", + "comment": "instantiate the component\n" + }, + { + "code_snippet":" public Class getObjectType() {\n return cc.getType();\n }\n", + "comment": "get the static type of the component\n" + }, + { + "code_snippet":" public Component getComponent() {\n return cc;\n }\n", + "comment": "get the component in this object\n" + }, + { + "code_snippet":" public Container getContainer() {\n return yan;\n }\n", + "comment": "get the container in this object\n" + }, + { + "code_snippet":" public int getBeanDefinitionCount() {\n final Set keys = yan.keys();\n int ret = 0;\n for(Iterator it=keys.iterator(); it.hasNext();){\n final Object key = it.next();\n if(key instanceof String){\n ret++;\n }\n }\n return ret;\n }\n", + "comment": "get the number of components registered under a string key\n" + }, + { + "code_snippet":" public Object getState() {\n return null;\n }\n", + "comment": "gets the user state of this component\n" + }, + { + "code_snippet":" public Component withState(Object obj){\n return Components.withState(this, obj);\n }\n", + "comment": "create a new component object with the specified user state\n" + }, + { + "code_snippet":" public Component singleton(){\n return Components.singleton(this);\n }\n", + "comment": "create a new component that utilizes singleton pattern when creating instance\n" + }, + { + "code_snippet":" public Component singleton(Pool scope){\n return Components.singleton(this, scope);\n }\n", + "comment": "create a new component that utilizes singleton pattern\n" + }, + { + "code_snippet":" public Component factory(Class factory_type, String toString){\n return Components.factory(this, factory_type, toString);\n }\n", + "comment": "create a component that instantiates a factory interface\n" + }, + { + "code_snippet":" public Map getBeansOfType(Class type) throws BeansException {\n return getBeansOfType(type, true, true);\n }\n", + "comment": "get the string object map of all components with the given type\n" + }, + { + "code_snippet":" public Component factory(Class factory_type){\n return factory(factory_type, factory_type.getName());\n }\n", + "comment": "create a component that instantiates a factory interface\n" + }, + { + "code_snippet":" public Component ctor(Constructor ctor){\n return Components.fun(ctor(ctor, mon));\n }\n", + "comment": "create a component for a constructor with monitoring support\n" + }, + { + "code_snippet":" public Object getDuplicatedKey() {\n return key;\n }\n", + "comment": "get the key object duplicated\n" + }, + { + "code_snippet":" public Component factory(String toString){\n return Components.factory(this, toString);\n }\n", + "comment": "create a component that instantiates the\n" + }, + { + "code_snippet":" public Component factory(){\n return factory(\"factory\");\n }\n", + "comment": "create a component that instantiates the\n" + }, + { + "code_snippet":" public Component guard(){\n return Components.guard(this);\n }\n", + "comment": "decorate this component so that\n" + }, + { + "code_snippet":" public Component method(Object obj, Method mtd){\n return Components.fun(method(obj, mtd, mon));\n }\n", + "comment": "create a component for a method with monitoring support\n" + }, + { + "code_snippet":" public Object getBean(final String name) throws BeansException {\n /*\n if(BeanFactoryUtils.isFactoryDereference(name)){\n final String factoryname = BeanFactoryUtils.transformedBeanName(name);\n return getFactoryBean(findComponent(factoryname));\n }\n else{\n return instantiate(findComponent(name)); \n }*/\n return getBean(name, null);\n }\n", + "comment": "get a bean object\n" + }, + { + "code_snippet":" public Component bindArguments(ParameterBinder binder){\n return Components.bindArguments(this, binder);\n }\n", + "comment": "create a new component object that\n" + }, + { + "code_snippet":" public Component bindArgument(int k, ParameterBinder binder){\n return Components.bindArgument(this, k, binder);\n }\n", + "comment": "create a new component object that\n" + }, + { + "code_snippet":" public Component withArguments(Part part){\n return Components.withArguments(this, part);\n }\n", + "comment": "create a new component object that\n" + }, + { + "code_snippet":" public Component withProperty(Object k, Creator factory){\n return Components.withProperty(this, k, factory);\n }\n", + "comment": "create a new component object that\n" + }, + { + "code_snippet":" public Component withProperty(Object k, Part p){\n return Components.withProperty(this, k, p);\n }\n", + "comment": "create a new component object that\n" + }, + { + "code_snippet":" public Component withProperties(Map props){\n return Components.withProperties(this, props);\n }\n", + "comment": "create a new component object that\n" + }, + { + "code_snippet":" public Component withProperties(String[] keys, Creator[] creators){\n return Components.withProperties(this, keys, creators);\n }\n", + "comment": "create a new component object that\n" + }, + { + "code_snippet":" public Component bindProperties(PropertyBinder binder){\n return Components.bindProperties(this, binder);\n }\n", + "comment": "create a new component object that\n" + }, + { + "code_snippet":" public Registrar getChild() {\n return child;\n }\n", + "comment": "get the child container\n" + }, + { + "code_snippet":" public Registrar getParent() {\n return parent;\n }\n", + "comment": "get the parent container\n" + }, + { + "code_snippet":" public Component bindProperty(Object k, PropertyBinder binder){\n return Components.bindProperty(this, k, binder);\n }\n", + "comment": "create a new component object that\n" + }, + { + "code_snippet":" public void registerComponent(Object key, Component cc) {\n child.registerComponent(key, cc);\n }\n", + "comment": "registers the component to the child container\n" + }, + { + "code_snippet":" public void unregisterComponentsOfType(Class type){\n child.unregisterComponentsOfType(type);\n }\n", + "comment": "unregisters components of a provided type from child container\n" + }, + { + "code_snippet":" public void unregisterComponent(Object key) {\n child.unregisterComponent(key);\n //parent.unregisterComponent(key);\n }\n", + "comment": "unregisters a component identified by a key\n" + }, + { + "code_snippet":" public Component getComponent(Object key) {\n Component cc = child.getComponent(key);\n if(cc==null) return parent.getComponent(key);\n else return cc;\n }\n", + "comment": "retrieves a component identified by a key\n" + }, + { + "code_snippet":" public Component getComponentOfType(Class type){\n Component c1 = getComponent(type);\n if(c1!=null) return c1;\n c1 = child.getComponentOfType(type);\n if(c1!=null) return c1;\n else return parent.getComponentOfType(type);\n }\n", + "comment": "retrieves a component of a provided type\n" + }, + { + "code_snippet":" public Component withProperties(Part p){\n return Components.withProperties(this, p);\n }\n", + "comment": "create a new component object that uses a part object\n" + }, + { + "code_snippet":" public Dependency getDependencyOfType(Class type, ComponentMap cmap){\n if(type==null || child.containsType(type))\n return child.getDependencyOfType(type, cmap);\n else return parent.getDependencyOfType(type, parent);\n }\n", + "comment": "gets the dependency object for a component of\n" + }, + { + "code_snippet":" public Component subsume(Class type){\n return Components.subsume(this, type);\n }\n", + "comment": "create a new component object that returns the given type\n" + }, + { + "code_snippet":" public void verify(ComponentMap cmap) {\n child.verify(cmap);\n parent.verify(parent);\n }\n", + "comment": "verifies all components in this container\n" + }, + { + "code_snippet":" public Component cast(Class type){\n return Components.cast(this, type);\n }\n", + "comment": "create a new component object that returns the given type\n" + }, + { + "code_snippet":" public Component transform(Object key, Component cc){\n return transform(cc);\n }\n", + "comment": "transforms one component object to another\n" + }, + { + "code_snippet":" public Component proxy(Class[] itfs){\n return Components.proxy(this, itfs);\n }\n", + "comment": "create a new component object that uses dynamic proxy\n" + }, + { + "code_snippet":" public Component proxy(Class itf){\n return Components.proxy(this, itf);\n }\n", + "comment": "create a new component object that uses dynamic proxy\n" + }, + { + "code_snippet":" public Component proxy(){\n return Components.proxy(this);\n }\n", + "comment": "create a new component object that uses dynamic proxy\n" + }, + { + "code_snippet":" public Object getComponentKey(){\n return key;\n }\n", + "comment": "get the component key\n" + }, + { + "code_snippet":" public Class getSource() {\n return source;\n }\n", + "comment": "to get the source of the exception\n" + }, + { + "code_snippet":" public Component map(jfun.yan.Map m){\n return Monad.map(this, m);\n }\n", + "comment": "create a new component that upon creation\n" + }, + { + "code_snippet":" public Component mutate(Mutation m){\n return Components.mutate(this, m);\n }\n", + "comment": "create a new component that upon creation\n" + }, + { + "code_snippet":" public Component incomplete(){\n return Components.incomplete(this);\n }\n", + "comment": "by default all components depended by this component\n" + }, + { + "code_snippet":" public Method getMethod() {\n return mtd;\n }\n", + "comment": "get the problematic method\n" + }, + { + "code_snippet":" public int getParameterCount() {\n return num_params;\n }\n", + "comment": "get the total number of parameters\n" + }, + { + "code_snippet":" public Map getParameters() {\n return params;\n }\n", + "comment": "get the parameters\n" + }, + { + "code_snippet":" public Map getProperties() {\n return props;\n }\n", + "comment": "get the properties\n" + }, + { + "code_snippet":" public Class getComponentType(){\n return rtype;\n }\n", + "comment": "get the property type\n" + }, + { + "code_snippet":" public void registerConstructor(Class c){\n registerConstructor(c, c);\n }\n", + "comment": "the component is transformed by\n" + }, + { + "code_snippet":" public Object getPropertyKey() {\n return key;\n }\n", + "comment": "gets the property key\n" + }, + { + "code_snippet":" public Object getObject() {\n return obj;\n }\n", + "comment": "gets the object that has the property\n" + }, + { + "code_snippet":" protected Component defaultTransform(Component comp){\n return comp.singleton();\n }\n", + "comment": "transforms a component by calling singleton\n" + }, + { + "code_snippet":" public Component ignoreProperty(Object key){\n return withProperty(key, Components.useDefault());\n }\n", + "comment": "ignore the property identified by a key and use the default mechanism\n" + }, + { + "code_snippet":" public void registerConstructor(Object key, Class c){\n registerComponent(key, defaultTransform(Components.ctor(c)));\n }\n", + "comment": "the component is transformed by\n" + }, + { + "code_snippet":" public void registerConstructor(Class c, Class[] param_types){\n registerConstructor(c, c, param_types);\n }\n", + "comment": "the component is transformed by\n" + }, + { + "code_snippet":" public Component optionalProperty(Object key){\n return Components.optionalProperty(this, key);\n }\n", + "comment": "make a property identified by a key optional\n" + }, + { + "code_snippet":" public void registerConstructor(Object key, Class c, Class[] param_types){\n registerComponent(key, \n defaultTransform(Components.ctor(c, param_types)));\n }\n", + "comment": "the component is transformed by\n" + }, + { + "code_snippet":" public void registerStaticMethod(Class c, String name){\n final Component comp = Components.static_method(c, name);\n registerComponent(defaultTransform(comp));\n }\n", + "comment": "the component is transformed by\n" + }, + { + "code_snippet":" public Class getActualType() {\n return actual_type;\n }\n", + "comment": "get the actual argument type\n" + }, + { + "code_snippet":" public Class getExpectedType() {\n return expected_type;\n }\n", + "comment": "get the expected type\n" + }, + { + "code_snippet":" public Component optionalProperties(){\n return Components.optionalProperties(this);\n }\n", + "comment": "make sure the properties are optional\n" + }, + { + "code_snippet":" public void registerStaticMethod(Object key, Class c, String name){\n registerComponent(key, \n defaultTransform(Components.static_method(c, name)));\n }\n", + "comment": "the component is transformed by\n" + }, + { + "code_snippet":" public void registerStaticMethod(Class c, String name, Class[] param_types){\n final Component comp = Components.static_method(c, name, param_types);\n registerComponent(defaultTransform(comp));\n }\n", + "comment": "the component is transformed by\n" + }, + { + "code_snippet":" public void registerStaticMethod(Object key, Class c, String name, Class[] param_types){\n registerComponent(key, \n defaultTransform(Components.static_method(c, name, param_types)));\n }\n", + "comment": "the component is transformed by\n" + }, + { + "code_snippet":" public Component optionalParameter(int ind){\n return Components.optionalParameter(this, ind);\n }\n", + "comment": "make a parameter optional\n" + }, + { + "code_snippet":" public void checkElement(int i, Class type){}\n", + "comment": "subclasses may choose to override this method\n" + }, + { + "code_snippet":" public ResourceLoader getPrimary(){\n return primary;\n }\n", + "comment": "get the primary resource\n" + }, + { + "code_snippet":" public ResourceLoader getAlternative(){\n return alt;\n }\n", + "comment": "get the alternative resource\n" + }, + { + "code_snippet":" public Component optionalParameters(){\n return Components.optionalParameters(this);\n }\n", + "comment": "make sure the parameters are optional\n" + }, + { + "code_snippet":" public Component withDefaultProperty(Object key, Creator def){\n return Components.withDefaultProperty(this, key, def);\n }\n", + "comment": "create a component object that will use an alternative creator object\n" + }, + { + "code_snippet":" public Component withDefaultArgument(int ind, Creator def){\n return Components.withDefaultArgument(this, ind, def);\n }\n", + "comment": "create a component object that will use an alternative creator object\n" + }, + { + "code_snippet":" public Constructor getConstructor(Class[] param_types){\n final Params params = new Params(param_types);\n return (Constructor)ctors.get(params);\n }\n", + "comment": "get the public constructor object identified by an array of parameter types\n" + }, + { + "code_snippet":" public Component optional(){\n return Monad.mplus(this, Components.useDefault());\n }\n", + "comment": "creates an optional component\n" + }, + { + "code_snippet":" public Component option(Object val){\n return Monad.mplus(this, Components.value(val));\n }\n", + "comment": "creates an optional component\n" + }, + { + "code_snippet":" public Component recover(Recovery r){\n return Monad.recover(this, r);\n }\n", + "comment": "create a new component object that will recover errors\n" + }, + { + "code_snippet":" public Component bind(Binder binder){\n return Monad.bind(this, binder);\n }\n", + "comment": "monadic bind operation\n" + }, + { + "code_snippet":" public Component bind(ComponentBinder binder){\n return Monad.bind(this, binder);\n }\n", + "comment": "monadic bind operation\n" + }, + { + "code_snippet":" public Component ifelse(Component yes, Component no){\n return Monad.ifelse(this, yes, no);\n }\n", + "comment": "create a component object according to the boolean value returned from this component\n" + }, + { + "code_snippet":" public Method getMethod(String name, Class[] param_types, boolean suppress_security){\n if(suppress_security)\n return getAnyMethod(name, param_types);\n else\n return getMethod(name, param_types);\n }\n", + "comment": "get the method identified by a signature\n" + }, + { + "code_snippet":" public Component followedBy(ComponentBinder binder){\n return Monad.followedBy(this, binder);\n }\n", + "comment": "create a staged component\n" + }, + { + "code_snippet":" protected Object getServiceKey(Service service){\n return \"service.\" + service.getSimpleName();\n }\n", + "comment": "get the key for caching a service\n" + }, + { + "code_snippet":" public Method getMethod(String name, Class[] param_types){\n return mtds.getMethod(new Sig(name, param_types));\n }\n", + "comment": "get the public method identified by a signature\n" + }, + { + "code_snippet":" public Component followedBy(Binder binder){\n return Monad.followedBy(this, binder);\n }\n", + "comment": "create a staged component\n" + }, + { + "code_snippet":" public Component followedBy(Creator c2){\n return Monad.followedBy(this, c2);\n }\n", + "comment": "create a staged component\n" + }, + { + "code_snippet":" public Factory getServant(){\n return servant;\n }\n", + "comment": "gets the factory that creates the servant object who backs this service\n" + }, + { + "code_snippet":" public Field getField(String name, boolean suppress_security){\n if(suppress_security)\n return getAnyField(name);\n else\n return getField(name);\n }\n", + "comment": "get the field object identified by name\n" + }, + { + "code_snippet":" public Component seq(Creator c2){\n return Monad.seq(this, c2);\n }\n", + "comment": "monadic sequence operation\n" + }, + { + "code_snippet":" public Field getField(String name){\n return flds.getField(name);\n }\n", + "comment": "get the public field object identified by name\n" + }, + { + "code_snippet":" public Component fromProperties(Object[] keys){\n return Components.fromProperties(this, keys);\n }\n", + "comment": "redirects resolution of arguments to properties\n" + }, + { + "code_snippet":" public Component fromArguments(Object[] keys){\n return Components.fromArguments(this, keys);\n }\n", + "comment": "redirects resolution of properties to arguments\n" + }, + { + "code_snippet":" public Component seal(){\n return Components.seal(this);\n }\n", + "comment": "create a sealed component\n" + }, + { + "code_snippet":" public Component getServant(){\n final Component cc = getComponent();\n if(cc!=null) return cc;\n if(this.implementationClass != null)\n return Components.ctor(this.implementationClass);\n if(this.serviceInterface != null)\n return Components.ctor(this.serviceInterface);\n throw raise(\n \"at least one of \\\"component\\\", \\\"serviceClass\\\" or \\\"implementationClass\\\" has to be specified.\"); \n }\n", + "comment": "get the component for the actual servant\n" + }, + { + "code_snippet":" public TypeFilteredPropertyPredicate addType(Class type){\n try{\n final BeanType btype = BeanType.instance(type);\n final Set names = btype.getPropertyNames();\n for(Iterator it=names.iterator();it.hasNext();){\n final String key = (String)it.next();\n final Method writer = btype.getWriter(key);\n if(writer!=null){\n addProperty(writer.getDeclaringClass(), key);\n }\n }\n }\n catch(IntrospectionException e){}\n return this;\n }\n", + "comment": "add property setters of an entire class to the filter list\n" + }, + { + "code_snippet":" public TypeFilteredPropertyPredicate addProperty(Class type, String key){\n final Object slot = filter_types.get(key);\n if(slot==null){\n filter_types.put(key, type);\n }\n else{\n final ArrayList list = toArrayList(slot);\n list.add(type);\n filter_types.put(key, list);\n }\n return this;\n }\n", + "comment": "add a single property of a type to the filter list\n" + }, + { + "code_snippet":" public Class getType() {\n return componentType;\n }\n", + "comment": "get the component type\n" + }, + { + "code_snippet":" public Class getType1() {\n return t1;\n }\n", + "comment": "gets the type of the first candidate component\n" + }, + { + "code_snippet":" public Class getType2() {\n return t2;\n }\n", + "comment": "gets the type of the second candidate component\n" + }, + { + "code_snippet":" public Function getFunction() {\n return f;\n }\n", + "comment": "gets the function object\n" + }, + { + "code_snippet":" public int getOrdinalPosition() {\n return pos;\n }\n", + "comment": "gets the ordinal position of the parameter\n" + }, + { + "code_snippet":" public Object getArrayObject() {\n return arr;\n }\n", + "comment": "get the array object that stores the elements\n" + }, + { + "code_snippet":" public Component method(final java.lang.reflect.Method mtd){\n return Components.bindMethod(this, mtd);\n }\n", + "comment": "creates a component that will call a certain method\n" + }, + { + "code_snippet":" public int getBegin() {\n return begin;\n }\n", + "comment": "get the begin index\n" + }, + { + "code_snippet":" public Set getSet() {\n return set;\n }\n", + "comment": "get the set object storing the component instances\n" + }, + { + "code_snippet":" public Component field(final java.lang.reflect.Field fld){\n return Components.bindField(this, fld);\n }\n", + "comment": "creates a component that will read a certain field\n" + }, + { + "code_snippet":" public Component repeat(int times){\n return Components.repeat(this, times);\n }\n", + "comment": "create a new component that repeatedly call this component for\n" + }, + { + "code_snippet":" public Component synchronize(){\n return Components.synchronizedComponent(this);\n }\n", + "comment": "create a component that the create method is put in a\n" + }, + { + "code_snippet":" public void contextInitialized(ServletContextEvent event) {\n this.yanLoader = createYanLoader();\n this.yanLoader.initContainer(event.getServletContext());\n }\n", + "comment": "initialize the root web application context\n" + }, + { + "code_snippet":" protected YanLoader createYanLoader() {\n return new YanLoader();\n }\n", + "comment": "create the yan loader to use\n" + }, + { + "code_snippet":" public void init() throws ServletException {\n this.YanLoader = createYanLoader();\n this.YanLoader.initContainer(getServletContext());\n }\n", + "comment": "initialize the root web application context\n" + }, + { + "code_snippet":" public YanLoader getYanLoader() {\n return yanLoader;\n }\n", + "comment": "return the yan loader used by this listener\n" + }, + { + "code_snippet":" public void contextDestroyed(ServletContextEvent event){\n if (this.yanLoader != null) {\n this.yanLoader.destroy(event.getServletContext());\n }\n }\n", + "comment": "close the root web application context\n" + }, + { + "code_snippet":" public void destroy() {\n if (this.YanLoader != null) {\n this.YanLoader.destroy(getServletContext());\n }\n }\n", + "comment": "close the root web application context\n" + }, + { + "code_snippet":" public void service(HttpServletRequest request, HttpServletResponse response) throws IOException {\n getServletContext().log(\n \"Attempt to call service method on YanLoaderServlet as [\" +\n request.getRequestURI() + \"] was ignored\");\n response.sendError(HttpServletResponse.SC_BAD_REQUEST);\n }\n", + "comment": "this should never even be called since no mapping to this servlet should\n" + }, + { + "code_snippet":" public ServletContext getServletContext(){\n return ctxt;\n }\n", + "comment": "to get the servlet context object\n" + }, + { + "code_snippet":" public ResourceLoader getResourceLoader(){\n return loader;\n }\n", + "comment": "to get the resource loader object\n" + }, + { + "code_snippet":" protected Object getMutex(){\n return getSession();\n }\n", + "comment": "this is not guaranteed to be safe\n" + }, + { + "code_snippet":" public Procedure getProcedure() {\n return proc;\n }\n", + "comment": "to get the procedure to invoke\n" + }, + { + "code_snippet":" public Container initContainer(ServletContext ctxt){\n return initContainer(\n ClassLoaderUtils.guessClassLoader(getClass().getClassLoader()),\n ctxt);\n }\n", + "comment": "initialize this object by instantiating container and life cycle manager\n" + }, + { + "code_snippet":" public boolean isEmpty(){\n return entries.isEmpty();\n }\n", + "comment": "is any life cycle phase defined at all\n" + }, + { + "code_snippet":" public ExceptionHandler getExceptionHandler() {\n return handler;\n }\n", + "comment": "to get the exception handler of this phase\n" + }, + { + "code_snippet":" public Object getPhaseKey() {\n return key;\n }\n", + "comment": "to get the key of this phase\n" + }, + { + "code_snippet":" public void put(Object key, Procedure proc, boolean reentrant){\n entries.put(key, new Entry(proc, reentrant));\n }\n", + "comment": "to add a phase\n" + }, + { + "code_snippet":" public void remove(Object key){\n entries.remove(key);\n }\n", + "comment": "to remove a phase definition\n" + }, + { + "code_snippet":" public boolean containsPhase(Object key){\n return procs.containsKey(key);\n }\n", + "comment": "to determine if a certain phase exists in the life cycle\n" + }, + { + "code_snippet":" public Entry get(Object key){\n return (Entry)entries.get(key);\n }\n", + "comment": "to get a phase entry\n" + }, + { + "code_snippet":" public Set keys(){\n return entries.keySet();\n }\n", + "comment": "to get all the phase keys\n" + }, + { + "code_snippet":" public Collection entries(){\n return Collections.unmodifiableCollection(entries.values());\n }\n", + "comment": "to get all the entries\n" + }, + { + "code_snippet":" public void addLiveObject(LiveObject lo){\n history.add(lo);\n }\n", + "comment": "add a live object into the lifecycle manager\n" + }, + { + "code_snippet":" public Component withLifecycle(Component c, Life life){\n return c.mutate(new InstanceTracker(life, history));\n }\n", + "comment": "to create a component object that enables life cycle support\n" + }, + { + "code_snippet":" public Set getManagedInstances(){\n return history;\n }\n", + "comment": "to get the managed\n" + }, + { + "code_snippet":" public void push(Object obj){\n trace.push(obj);\n }\n", + "comment": "pushes a resolution frame to the exception\n" + }, + { + "code_snippet":" public Stack getResolutionTrace(){\n return trace;\n }\n", + "comment": "get the resolution trace\n" + },{ + "code_snippet":" public void printResolutionTrace(PrintStream out){\n printResolutionTrace(new java.io.PrintWriter(out, true));\n }\n", + "comment": "print the resolution trace\n" + }, + { + "code_snippet":" public void printResolutionTrace(java.io.PrintWriter out){\n final int size = trace.size();\n for(int i=0; i\");\n }\n", + "comment": "make sure an object is not null\n" + }, + { + "code_snippet":" protected void checkDuplicate(String attrname, Object v){\n if(v!=null){\n raise(\"attribute <\"+attrname+\"> already specified.\");\n }\n }\n", + "comment": "makes sure that a certain attribute is not set yet\n" + }, + { + "code_snippet":" /*protected void checkSingleChild(Object[] vals){\n if(vals.length>1)\n throw raise(\"only one sub-element is allowed\");\n }*/\n", + "comment": "makes sure the array only contains one element\n" + }, + { + "code_snippet":" public ParameterBinder getParameterWiring(String mode){\n return env.getParameterWiringMode(mode, loc);\n }\n", + "comment": "to get the parameter binder object that encapsulates the auto wiring strategy\n" + }, + { + "code_snippet":" public PropertyBinder getPropertyWiring(String mode){\n return env.getPropertyWiringMode(mode, loc);\n }\n", + "comment": "to get the property binder object that encapsulates the auto wiring strategy\n" + }, + { + "code_snippet":" public boolean isDeserializable(Class type){\n return type.isAssignableFrom(String.class) \n || deserializers.get(type)!=null\n || PropertyEditorManager.findEditor(type)!=null;\n }\n", + "comment": "to determine if string literal can be converted to the target type\n" + }, + { + "code_snippet":" public Map preInstantiate(Container container, Map store){\n final UID[] keys = getOrderedUIDs();\n for(int i=0; i list:\n all_ready = False\n while not all_ready:\n self._instances = self.get_instances()\n if not self._instances:\n self._logger.warning(\n f\"No instance found, waiting {wait_time}s ...\",\n )\n sleep(wait_time)\n continue\n all_ready = True\n for instance in self._instances:\n if not instance[\"health\"]:\n self._logger.warning(\n f\"Instance {instance['name']} is not ready, waiting {wait_time}s ...\",\n )\n sleep(wait_time)\n all_ready = False\n break\n return self._instances" + }, + { + "comment": "Process a single Docker container instance and convert its information into a dictionary.", + "code_snippet": "def _to_instances(self, controller_instance) -> List[dict]:\n instance = {}\n instance[\"name\"] = controller_instance.name\n instance[\"hostname\"] = controller_instance.name\n instance[\"health\"] = controller_instance.status == \"running\" and controller_instance.attrs[\"State\"][\"Health\"][\"Status\"] == \"healthy\"\n instance[\"env\"] = {}\n for env in controller_instance.attrs[\"Config\"][\"Env\"]:\n variable = env.split(\"=\")[0]\n value = env.replace(f\"{variable}=\", \"\", 1)\n instance[\"env\"][variable] = value\n return [instance]" + }, + { + "comment": "Multiple attributes are combined to determine the health status of the instance.", + "code_snippet": "instance[\"health\"] = controller_instance.status == \"running\" and controller_instance.attrs[\"State\"][\"Health\"][\"Status\"] == \"healthy\"" + }, + { + "comment": "Use regular expressions to match variable names to determine whether they conform to a specific configuration item format.", + "code_snippet": "result = self.__custom_confs_rx.search(variable)" + }, + { + "comment": "Call the update_needed method to check whether the current configuration needs to be updated.", + "code_snippet": "if not self.update_needed(self._instances, self._services, configs=self._configs):" + }, + { + "comment": "Set values \u200B\u200Bin a grid map based on the shape of the polygon.", + "code_snippet": "def set_value_from_polygon(self, pol_x, pol_y, val, inside=True):\n \"\"\"set_value_from_polygon\n Setting value inside or outside polygon\n :param pol_x: x position list for a polygon\n :param pol_y: y position list for a polygon\n :param val: grid value\n :param inside: setting data inside or outside\n \"\"\"\n # making ring polygon\n if (pol_x[0] != pol_x[-1]) or (pol_y[0] != pol_y[-1]):\n np.append(pol_x, pol_x[0])\n np.append(pol_y, pol_y[0])\n # setting value for all grid\n for x_ind in range(self.width):\n for y_ind in range(self.height):\n x_pos, y_pos = self.calc_grid_central_xy_position_from_xy_index(\n x_ind, y_ind)\n flag = self.check_inside_polygon(x_pos, y_pos, pol_x, pol_y)\n if flag is inside:\n self.set_value_from_xy_index(x_ind, y_ind, val)" + }, + { + "comment": "A static method used to determine whether a point is inside a polygon.", + "code_snippet": "def check_inside_polygon(iox, ioy, x, y):\n n_point = len(x) - 1\n inside = False\n for i1 in range(n_point):\n i2 = (i1 + 1) % (n_point + 1)\n if x[i1] >= x[i2]:\n min_x, max_x = x[i2], x[i1]\n else:\n min_x, max_x = x[i1], x[i2]\n if not min_x <= iox < max_x:\n continue\n tmp1 = (y[i2] - y[i1]) / (x[i2] - x[i1])\n if (y[i1] + tmp1 * (iox - x[i1]) - ioy) > 0.0:\n inside = not inside\n return inside" + }, + { + "comment": "Check if the generated path conflicts with obstacles.", + "code_snippet": "if not check_car_collision(x_list, y_list, yaw_list, ox, oy, kd_tree):\n return None" + }, + { + "comment": "Add the node to the priority queue (implemented using a heap data structure).", + "code_snippet": "heapq.heappush(pq, (calc_cost(start_node, h_dp, config),\n calc_index(start_node, config)))" + }, + { + "comment": "Use the Reed-Shepp function to plan a path from the current position to the target position.", + "code_snippet": "def analytic_expansion(current, goal, ox, oy, kd_tree):\n start_x = current.x_list[-1]\n start_y = current.y_list[-1]\n start_yaw = current.yaw_list[-1]\n goal_x = goal.x_list[-1]\n goal_y = goal.y_list[-1]\n goal_yaw = goal.yaw_list[-1]\n max_curvature = math.tan(MAX_STEER) / WB\n paths = rs.calc_paths(start_x, start_y, start_yaw,\n goal_x, goal_y, goal_yaw,\n max_curvature, step_size=MOTION_RESOLUTION)\n if not paths:\n return None\n best_path, best = None, None\n for path in paths:\n if check_car_collision(path.x, path.y, path.yaw, ox, oy, kd_tree):\n cost = calc_rs_path_cost(path)\n if not best or best > cost:\n best = cost\n best_path = path\n return best_path" + }, + { + "comment": "Convert node coordinates to one-dimensional indices.", + "code_snippet": "def calc_index(node, x_width, x_min, y_min):\n return (node.y - y_min) * x_width + (node.x - x_min)" + }, + { + "comment": "Implement interpolation between two points to check if there is an obstacle blocking the line of sight.", + "code_snippet": "def in_line_of_sight(obs_grid, x1, y1, x2, y2):\n t = 0\n while t <= 0.5:\n xt = (1 - t) * x1 + t * x2\n yt = (1 - t) * y1 + t * y2\n if obs_grid[(int(xt), int(yt))]:\n return False, None\n xt = (1 - t) * x2 + t * x1\n yt = (1 - t) * y2 + t * y1\n if obs_grid[(int(xt), int(yt))]:\n return False, None\n t += 0.001\n dist = np.linalg.norm(np.array([x1, y1] - np.array([x2, y2])))\n return True, dist" + }, + { + "comment": "Adjust the calculation of heuristic cost and introduce dynamic weights to optimize search efficiency.", + "code_snippet": "if use_dynamic_weighting:\n w = (1 + epsilon - epsilon*depth/upper_bound_depth)" + }, + { + "comment": "Optimize the search process to reduce the number of nodes checked.", + "code_snippet": "if np.linalg.norm(np.array([x1, y1] -\n np.array([x2, y2]))) > max_corner:\n continue\n reachable, offset = in_line_of_sight(self.obs_grid, x1,\n y1, x2, y2)" + },{ + "comment": "Compute uniform or biased polar states for initialization of trajectory generation.", + "code_snippet": "angle_samples = [i / (nxy - 1) for i in range(nxy)]\n states = sample_states(angle_samples, a_min, a_max, d, p_max, p_min, nh)" + }, + { + "comment": "Compute lane-related states.", + "code_snippet": "def calc_lane_states(l_center, l_heading, l_width, v_width, d, nxy):\n \"\"\"\n calc lane states\n :param l_center: lane lateral position\n :param l_heading: lane heading\n :param l_width: lane width\n :param v_width: vehicle width\n :param d: longitudinal position\n :param nxy: sampling number\n :return: state list\n \"\"\"\n xc = d\n yc = l_center\n states = []\n for i in range(nxy):\n delta = -0.5 * (l_width - v_width) + \\\n (l_width - v_width) * i / (nxy - 1)\n xf = xc - delta * math.sin(l_heading)\n yf = yc + delta * math.cos(l_heading)\n yawf = l_heading\n states.append([xf, yf, yawf])\n return states" + }, + { + "comment": "Check if the Pinecone environment exists and initialize the Pinecone environment.", + "code_snippet": "def check_if_pinecone_environment_exists(\n environment: str,\n api_key: str,\n credential = None):\n \"\"\"_summary_\n Args:\n account_name (str): _description_\n database_name (str): _description_\n subscription_id (str): _description_\n resource_group (str): _description_\n credential: Azure credential to use for getting acs instance\n \"\"\"\n if credential is None:\n raise ValueError(\"credential cannot be None\")\n try:\n pinecone.init(api_key=api_key, environment=environment)\n except:\n raise BaseException(\"Invalid env or key\")" + }, + { + "comment": "Lazy loading model, initialization is called for the first time.", + "code_snippet": "def canny(img, res, l, h):\n img = resize_image(HWC3(img), res)\n global model_canny\n if model_canny is None:\n from annotator.canny import CannyDetector\n model_canny = CannyDetector()\n result = model_canny(img, l, h)\n return [result]" + }, + { + "comment": "Bind button click event and call canny function.", + "code_snippet": "run_button.click(fn=canny, inputs=[input_image, resolution, low_threshold, high_threshold], outputs=[gallery])" + }, + { + "comment": "Load the converted weights into the model.", + "code_snippet": "model.load_state_dict(target_dict, strict=True)\ntorch.save(model.state_dict(), output_path)" + }, + { + "comment": "Check parameters to ensure that the input path is valid.", + "code_snippet": "assert os.path.exists(input_path), 'Input model does not exist.'\nassert not os.path.exists(output_path), 'Output filename already exists.'\nassert os.path.exists(os.path.dirname(output_path)), 'Output path is not valid.'" + }, + { + "comment": "Modify the global Cookie policy to ensure that the Requests library can handle Cookies correctly.", + "code_snippet": "cookiejar.DefaultCookiePolicy = HTTPieCookiePolicy" + }, + { + "comment": "Determines whether the session name contains path separators to identify anonymous sessions.", + "code_snippet": "def is_anonymous_session(session_name: str) -> bool:\n return os.path.sep in session_name" + }, + { + "comment": "Convert each Cookie object in the CookieJar to a dictionary and return a list of dictionaries.", + "code_snippet": "def materialize_cookies(jar: RequestsCookieJar) -> List[Dict[str, Any]]:\n return [\n materialize_cookie(cookie)\n for cookie in jar\n ]" + }, + { + "comment": "Matches a valid MIME type and checks whether the specified MIME type is valid.", + "code_snippet": "MIME_RE = re.compile(r'^[^/]+/[^/]+$')\ndef is_valid_mime(mime):\n return mime and MIME_RE.match(mime)" + }, + { + "comment": "Calls all enabled plugins to format request headers.", + "code_snippet": "def format_headers(self, headers: str) -> str:\n for p in self.enabled_plugins:\n headers = p.format_headers(headers)\n return headers" + }, + { + "comment": "Split the input tensor and distribute it to different processes.", + "code_snippet": "def _all_to_all(\n input_: torch.Tensor,\n world_size: int,\n group: dist.ProcessGroup,\n scatter_dim: int,\n gather_dim: int,\n):\n input_list = [t.contiguous() for t in torch.tensor_split(input_, world_size, scatter_dim)]\n output_list = [torch.empty_like(input_list[0]) for _ in range(world_size)]\n dist.all_to_all(output_list, input_list, group=group)\n return torch.cat(output_list, dim=gather_dim).contiguous()" + }, + { + "comment": "Query a model's tables, columns, and data types.", + "code_snippet": "vn.train(ddl=\"CREATE TABLE my_table (id INT, name TEXT)\")" + }, + { + "comment": "If urllib3 version is less than 2.0.0, monkey-patch to force ssl_wrap_context to pass server_hostname regardless.", + "code_snippet": "if urllib3_version < (2, 0, 0):\n with contextlib.suppress(Exception):\n urllib3.util.IS_SECURETRANSPORT = urllib3.util.ssl_.IS_SECURETRANSPORT = True" + }, + { + "comment": "Filtering urllib3's log messages.", + "code_snippet": "class Urllib3LoggingFilter(logging.Filter):\n def filter(self, record):\n # Ignore HTTP request messages since HTTPConnection prints those\n return record.msg != '%s://%s:%s \"%s %s %s\" %s %s'" + }, + { + "comment": "Check the module path attribute.", + "code_snippet": "def _is_package(module):\n return '__path__' in vars(module)" + }, + { + "comment":"Create a function to calculate the sum of a sequence of integers.", + "code_snippet":"# Python code\ndef sum_sequence(sequence):\n sum = 0\n for num in sequence:\n sum += num\n return sum" + }, + { + "comment":"Generate a Python code for crawling a website for a specific type of data.", + "code_snippet":"import requests\nimport re\n\ndef crawl_website_for_phone_numbers(website):\n response = requests.get(website)\n phone_numbers = re.findall('\\d{3}-\\d{3}-\\d{4}', response.text)\n return phone_numbers\n \nif __name__ == '__main__':\n print(crawl_website_for_phone_numbers('www.example.com'))" + }, + { + "comment":"Create a Python list comprehension to get the squared values of a list [1, 2, 3, 5, 8, 13].", + "code_snippet":"[x*x for x in [1, 2, 3, 5, 8, 13]]" + }, + { + "comment":"Generate a python script to perform this action.", + "code_snippet":"def remove_duplicates(string): \n result = \"\" \n prev = '' \n\n for char in string:\n if char != prev: \n result += char\n prev = char\n return result\n\nresult = remove_duplicates(\"AAABBCCCD\")\nprint(result)" + }, + { + "comment":"Write a python script to generates random numbers between 0 and 9 that are divisible by 3.", + "code_snippet":"def generate_random_divisible_number():\n import random\n while True:\n # Generate a random number\n process = random.randint(0, 9)\n # Check if the number is divisible by 3\n if process % 3 == 0:\n # If it is divisible, return it\n return process" + }, + { + "comment":"Write a Python code to get the third largest element in a given row.", + "code_snippet":"def third_largest(lst):\n if len(lst) < 3:\n return\n distinct = []\n for i in lst:\n if i not in distinct:\n distinct.append(i)\n distinct.sort(reverse=True)\n return distinct[2]" + }, + { + "comment":"Generate a REST API with Python and Flask that allows users to create, read, update, and delete records from a database.", + "code_snippet":"from flask import Flask, request\nfrom flask_sqlalchemy import SQLAlchemy\n\napp = Flask(name)\napp.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:\/\/\/\/tmp\/test.db'\ndb = SQLAlchemy(app)\n\nclass Record(db.Model):\n id = db.Column(db.Integer, primary_key=True)\n name = db.Column(db.String(120), unique=True)\n\ndb.create_all()\n\n@app.route('\/records', methods=['GET'])\ndef get_records():\n records = Record.query.all()\n return {'records':[record.name for record in records]}\n\n@app.route('\/records', methods=['POST'])\ndef create_record():\n record = Record(name=request.json['name'])\n db.session.add(record)\n db.session.commit()\n return {'id': record.id}\n\n@app.route('\/records\/int:id', methods=['PUT'])\ndef update_record(id):\n record = Record.query.get(id)\n record.name = request.json['name']\n db.session.commit()\n return {'message': 'Record updated'}\n\n@app.route('\/records\/int:id', methods=['DELETE'])\ndef delete_record(id):\n Record.query.filter_by(id=id).delete()\n db.session.commit()\n return {'message': 'Record deleted'}\n\nif name == 'main':\n app.run(debug=True)" + }, + { + "comment":"Create a Python function that takes in a string and a list of words and returns true if the string contains all the words in the list.", + "code_snippet":"def contains_words(input_string, words):\n for word in words:\n if word not in input_string:\n return False\n return True\n\nprint(contains_words('This is a test', ['test', 'this', 'is'])) # True" + }, + { + "comment":"What should this Python program do?", + "code_snippet":"def is_palindrome(s):\n '''This function returns True if the string s is a palindrome and False if it isn't.''' \n return s == s[::-1] # Checks if s is the same as its reverse." + }, + { + "comment":"Create a Python program to sort and print out the elements of an array of integers.", + "code_snippet":"arr = [17, 41, 5, 22, 54, 6, 29, 3, 13]\n\narr.sort()\n\nprint(arr)\n\n# Output: [3, 5, 6, 13, 17, 22, 29, 41, 54]" + }, + { + "comment":"Generate a Python program that adds two then doubles the result.", + "code_snippet":"def double_add(num1, num2): \n return (num1 + num2) * 2" + }, + { + "comment":"Write a python program to extract the non-vowel words in a sentence.", + "code_snippet":"def extract_words(sentence):\n non_vowel_words = []\n words = sentence.split()\n vowels = ['a','e','i','o','u','A','E','I','O','U']\n for word in words:\n if not any(char in vowels for char in word):\n non_vowel_words.append(word) \n return non_vowel_words" + }, + { + "comment":"Write a Python program to calculate the average of a list of positive integers and output the result.", + "code_snippet":"list_of_positive_integers = [1, 5, 6, 7, 8]\ntotal = sum(list_of_positive_integers)\n\navg = total\/len(list_of_positive_integers)\n\nprint(\"The average of the list of positive integers is {}\".format(avg))"} + { + "comment":"Develop a function in Python that prints out the Pascal's triangle for a given number of rows.", + "code_snippet":"def pascal_triangle(n): \n trow = [1] \n y = [0] \n for x in range(max(n, 0)): \n print(trow) \n trow=[l+r for l,r in zip(trow + y, y + trow)] \n return n>=1\n\npascal_triangle(5)"} + { + "comment":"Implement a decision-making algorithm for a rock-paper-scissors game using Python.", + "code_snippet":"# Rock, paper, scissors game!\n\ndef play_game(player1, player2):\n # Define the winning rules\n rules = {'rock': 'scissors', 'scissors': 'paper', 'paper': 'rock'}\n if player1 == player2:\n return 'It is a draw!'\n elif rules[player1] == player2:\n return 'Player 1 wins!'\n elif rules[player2] == player1:\n return 'Player 2 wins!'\n\n# Get user input\nplayer1 = input('Player 1, make a choice: rock, paper, or scissors? ').lower()\nplayer2 = input('Player 2, make a choice: rock, paper, or scissors? ').lower()\n\n# Start the game and get the result\nresult = play_game(player1, player2)\nprint(result)"} + { + "comment":"Collate a machine learning model in Python that distinguishes between cats and dogs.", + "code_snippet":"import numpy as np\nimport keras\nfrom keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten\nfrom keras.models import Sequential\nfrom keras.preprocessing.image import ImageDataGenerator\n\n# Create the neural network model\nmodel = Sequential()\n\n# Input layer\nmodel.add(Conv2D(32, (3, 3), input_shape = (64, 64, 3), activation = 'relu'))\n\n# Hidden layers\nmodel.add(MaxPooling2D(pool_size = (2, 2)))\nmodel.add(Dropout(0.3))\nmodel.add(Conv2D(64, (3, 3), activation = 'relu'))\nmodel.add(MaxPooling2D(pool_size = (2, 2)))\nmodel.add(Dropout(0.3))\nmodel.add(Conv2D(64, (3, 3), activation = 'relu'))\nmodel.add(MaxPooling2D(pool_size = (2, 2)))\nmodel.add(Dropout(0.3))\n\n# Output layer\nmodel.add(Flatten())\nmodel.add(Dense(units = 128, activation = 'relu'))\nmodel.add(Dense(units = 1, activation = 'sigmoid'))\n\n# Compile the model\nmodel.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])\n\n# Create data generator\ndatagen = ImageDataGenerator(rescale = 1.\/255, shear_range = 0.2, zoom_range = 0.2, horizontal_flip = True)\n\n# Fit the model\ntrain_generator = datagen.flow_from_directory(directory = '\/path\/to\/dataset', target_size = (64, 64), color_mode = 'rgb', class_mode = 'binary', batch_size = 32)\nmodel.fit_generator(generator = train_generator, steps_per_epoch = 800, epochs = 5, validation_data = test_generator, validation_steps = 200)"} + { + "comment":"Write a Python program to find all the numbers divisible by a certain number within a certain range.", + "code_snippet":"def find_numbers_divisible_by(m, n, divisor):\n divisible_numbers = []\n for i in range(m, n+1):\n if i % divisor == 0:\n divisible_numbers.append(i)\n return divisible_numbers"} + { + "comment":"Implement a Python function that will take two strings and return true if the first string contains the second.", + "code_snippet":"def check_contains(query, word): \n if query.find(word) != -1:\n return True\n return False\n\nresult = check_contains(query, word)"} + { + "comment":"Write an algorithm in Python 3 that calculates the factorial of a number.", + "code_snippet":"def factorial(n):\n if n == 0:\n return 1\n else:\n return n * factorial(n-1)" + }, + { + "comment":"Develop a Python function to predict the class labels of a given set of data.", + "code_snippet":"# Load the dataset\nimport pandas as pd\ncsv_url = 'http:\/\/test.test\/test.csv'\ndf = pd.read_csv(csv_url)\n\n# Split the dataset into features and label\nX = df.iloc[:, 0:4].values\ny = df.iloc[:, 4].values\n\n# Import the ML model\nfrom sklearn.tree import DecisionTreeClassifier\n\n# Create and fit the model\nmodel = DecisionTreeClassifier()\nmodel.fit(X, y)\n\n# Predict the label\ndef predict(data):\n predicted_label = model.predict([data])\n return predicted_label"} + { + "comment":"Develop a Python program to generate random passwords that consist of 8 characters.", + "code_snippet":"import string\nimport random\n\ndef generate_password():\n chars = string.ascii_letters + string.digits\n return ''.join(random.choice(chars) for _ in range(8))\n\npassword = generate_password()\nprint(password)"} + { + "comment":"Generate a list using a while loop in Python and appends the current number to the list.", + "code_snippet":"my_list = []\n\nnum = 0\n\nwhile num < 10:\n my_list.append(num)\n num += 1\n\nprint(my_list)"}, + { + "comment":"Write a Python program to query Twitter to get the most recent tweets from a user.", + "code_snippet":"import tweepy\n\n# Authenticate to Twitter\nauth = tweepy.OAuthHandler(\"CONSUMER_KEY\", \"CONSUMER_SECRET\")\nauth.set_access_token(\"ACCESS_TOKEN\", \"ACCESS_TOKEN_SECRET\")\n# Create API object\napi = tweepy.API(auth)\n\n# Get the most recent tweets from the given user\ntweets = api.user_timeline(screen_name=\"@elonmusk\")\n\nfor tweet in tweets:\n print(tweet.text)" + }, + { + "comment":"Create a Python neural network model that takes in a given trajectory of stock prices and predicts the stock price for the next day.", + "code_snippet":"import numpy as np\nimport tensorflow as tf\n\ndef create_model():\n model = tf.keras.Sequential([\n tf.keras.layers.Dense(64, activation='relu', input_shape=(7, )),\n tf.keras.layers.Dense(1)\n ])\n\n model.compile(loss='mean_squared_error', \n optimizer='adam',\n metrics=['accuracy'])\n\n return model\n \nif __name__ == \"__main__\":\n model = create_model()\n\n input_data = np.array([[10, 10.4, 10.5, 10.6, 10.7, 10.9, 11]])\n predictions = model.predict(input_data)\n\n print(f\"Predicted stock price for the next day is: {predictions[0][0]}\")" + }, + { + "comment":"Create a way to encrypt a message using a key in Python.", + "code_snippet":"import hashlib\n\ndef encrypt(message, key):\n encrypted_msg = \"\"\n for char in message:\n encrypted_msg += chr(ord(char) + ord(key[0]))\n return encrypted_msg\n\nencrypted_msg = encrypt(message, key)\nprint(encrypted_msg)" + }, + { + "comment":"Create a function in Python that takes two parameters and prints out the larger of them.", + "code_snippet":"def printMax(parameter_1, parameter_2):\n if parameter_1 > parameter_2:\n print(parameter_1)\n else:\n print(parameter_2)" + }, + { + "comment":"Rewrite the following Python code by introducing list comprehensions.", + "code_snippet":"even_numbers = [i for i in range(1, 21) if i % 2 == 0]" + }, + { + "comment":"Develop an algorithm in Python to print Fibonacci series up to a given number.", + "code_snippet":"def fibonacci(n): \n a = 0\n b = 1\n if n < 0: \n print(\"Incorrect input\") \n elif n == 0: \n return a \n elif n == 1: \n return b \n else: \n for i in range(2,n): \n c = a + b \n a = b \n b = c \n return b \n \nn = int(input(\"Enter a number: \"))\nprint(fibonacci(n))" + }, + { + "comment":"Make the following code snippet into a function in Python3 capable of solving Any Sudoku puzzle", + "code_snippet":"def solve_sudoku(board): \n \n # Utility function to find vacant cells \n # Returns a boolean \n def find_vacant_cell(board, l): \n for row in range(9): \n for col in range(9): \n if board[row][col] == 0: \n l[0] = row \n l[1] = col \n return True\n return False\n \n # Utility function to check if a number \n # is not currently placed in a particular \n # row, column and block \n def is_safe(board, row, col, num): \n \n # Check if 'num' is not already placed \n # in current row, current column \n # and current 3x3 box \n for i in range(9): \n \n # Check the rows\n if(board[row][i] == num): \n return False\n \n # Check the columns \n if(board[i][col] == num): \n return False\n \n # Check the 3x3 boxes \n if(board[(row\/\/3)*3 + i\/\/3][(col\/\/3)*3 + i%3] == num): \n return False\n \n return True\n \n # Solve the sudoku \n def solve(board): \n # 'l' is a list variable that keeps \n # the record of row and col in \n # find_vacant_cell function \n l = [0, 0] \n \n # If there is no unassigned location, \n # we are done \n if (not find_vacant_cell(board, l)): \n return True\n \n # Assigning the list values\n row = l[0] \n col = l[1] \n \n # consider digits 1 to 9 \n for num in range(1, 10): \n \n # if it is a safe position\n if (is_safe(board, row, col, num)): \n \n # make tentative assignment \n board[row][col] = num \n \n # return if succcess\n if (solve(board)): \n return True\n \n # failure, unmake and try again \n board[row][col] = 0\n \n # trigger backtracking \n return False\n \n #Driver Program \n if (solve(board)): \n for row in board: \n print(row) \n else: \n print(\"No solution\")" + }, + { + "comment":"Write a function in Python to calculate the sum of the digits in a given number.", + "code_snippet":"def sum_digits(num):\n total = 0\n while num > 0:\n digit = num % 10\n total += digit\n num = num \/\/ 10\n return total" + }, + { + "comment":"Take the list of people below and create a Python dictionary with the names as keys and the ages as values.", + "code_snippet":"people_dict = {'John': 20, 'Jane': 40, 'Alex': 18, 'Peter': 45}" + }, + { + "comment":"Develop a Python program to clearly understand the concept of recursion.", + "code_snippet":"# This program is to understand recursion in Python.\n\n# Define a recursive function to calculate the factorial of a number\ndef factorial(n):\n if n == 1:\n return 1\n else:\n return n * factorial(n - 1)\n\n# Calculate factorial of number 5\nresult = factorial(5)\nprint(\"Factorial of 5 is \", result) \n\n# Output: Factorial of 5 is 120" + }, + { + "comment":"Generate a random 6-digits number in Python.", + "code_snippet":"import random\n\nnumber = random.randint(100000, 999999)\nprint(number)"}, + { + "comment":"Create a sorting algorithm in Python to sort strings by their length.", + "code_snippet":"def sort_by_length(strings):\n for i in range(len(strings) - 1):\n for j in range(i + 1, len(strings)):\n if len(strings[i]) > len(strings[j]):\n strings[i], strings[j] = strings[j], strings[i]\n return strings\n\nunsorted_list = [\"cat\", \"dog\", \"apple\", \"banana\"]\n\nsorted_list = sort_by_length(unsorted_list)\n\nprint(sorted_list)" + }, + { + "comment":"Write a Python function to get the maximum and minimum values in a given list.", + "code_snippet":"def get_max_min(lst):\n min_value = lst[0]\n max_value = lst[0]\n \n for i in range(1, len(lst)):\n if lst[i] > max_value:\n max_value = lst[i]\n if lst[i] < min_value:\n min_value = lst[i]\n \n return (min_value, max_value)" + }, + { + "comment":"Create a for loop in Python to iterate over a list of numbers and print only the even numbers.", + "code_snippet":"for num in my_list:\n if num % 2 == 0:\n print(num)" + }, + { + "comment":"Can you suggest a pure python alternative to declaring a Float32 array with numbers from 0 to 10?", + "code_snippet":"myArray = [float(i) for i in range(0,11)]" + }, + { + "comment":"Construct a neural network model in Python to classify the MNIST data set correctly.", + "code_snippet":"import keras\n\n# Input layer\ninput_layer = keras.layers.Input(shape=(784,))\n\n# Hidden layer\nhidden_layer = keras.layers.Dense(128, \n activation='relu')(input_layer)\n\n# Output layer\noutput_layer = keras.layers.Dense(10, \n activation='softmax')(hidden_layer)\n\n# Model\nmodel = keras.Model(inputs=input_layer, outputs=output_layer)\n\n# Compile model\nmodel.compile(optimizer='adam', \n loss='sparse_categorical_crossentropy',\n metrics=['accuracy'])" + }, + { + "comment": "In Python, create a function to find the maximum value within a nested dictionary.", + "code_snippet": "def findMaxValue(dict):\n max_val = None\n for key, value in dict.items():\n if isinstance(value, dict):\n max_val = max(findMaxValue(value), max_val)\n else:\n max_val = max(value, max_val)\n return max_val\n\nprint(findMaxValue(sample_dict))" + }, + { + "comment": "Write a function in Python that returns an object of the following time and date: Sunday March 7 5:30 PM.", + "code_snippet": "from datetime import datetime\n\ndef get_date_time():\n dt = datetime(year=2021, month=3, day=7, hour=17, minute=30)\n return dt\n\ndate_time = get_date_time()\nprint(date_time) # will output 2021-03-07 17:30:00" + }, + { + "comment": "Generate a Python program to perform matrix multiplication on two given matrices.", + "code_snippet": "def matrix_multiplication(A, B): \n rows_A = len(A) \n cols_A = len(A[0]) \n rows_B = len(B) \n cols_B = len(B[0]) \n result = [[0 for i in range(cols_B)] for j in range(rows_A)] \n for i in range(rows_A): \n for j in range(cols_B): \n for k in range(cols_A): \n result[i][j] += A[i][k] * B[k][j] \n return result" + }, + { + "comment": "Write a Python script to read an array from the user and then determine the mean and median of the array.", + "code_snippet": "def mean_median(arr): \n arr_sum = 0 \n for i in range(len(arr)): \n arr_sum += arr[i] \n \n mean = arr_sum/len(arr) \n \n arr = sorted(arr) \n if len(arr) % 2 != 0: \n median = arr[floor(len(arr)/2)] \n else: \n median = (arr[len(arr)//2] + arr[len(arr)//2 - 1]) / 2\n \n return mean, median\n\nmean, median = mean_median(arr) \nprint(\"Mean =\", mean) \nprint(\"Median =\", median)" + }, + { + "comment": "Create an algorithm in Python to check whether an array has even or odd number of elements.", + "code_snippet": "def is_array_even(arr): \n if len(arr)%2 == 0:\n return True\n else:\n return False" + }, + { + "comment": "Implement a Python function to calculate the Gini coefficient of a given dataset.", + "code_snippet": "def gini(x):\n \"\"\"Calculate Gini Coefficient of a given dataset.\"\"\"\n # calculate the frequency of each item in x\n x_value_counts = x.value_counts().sort_index() \n\n # calculate the cumulative sum of x_value_counts\n x_cum_sum = x_value_counts.cumsum()\n\n # calculate the Lorenz values\n n = len(x)\n lorenz = x_cum_sum / n\n lorenz = np.insert(lorenz, 0, 0) # add 0 to the beginning of array\n\n # calculate the Gini Coefficient\n width = lorenz[:-1] - lorenz[1:]\n gini_coef = (1 / n) * sum(width * (lorenz[1:] + lorenz[:-1]))\n\n return gini_coef\n\nif __name__ == '__main__':\n x = np.array([2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,21])\n gini_coef = gini(x)\n print(gini_coef) # should print 0.605" + }, + { + "comment": "Generate a neural network model with TensorFlow and Python for classifying hand-written digits from the MNIST dataset.", + "code_snippet": "import tensorflow as tf\n\n# Import MNIST data\nfrom tensorflow.examples.tutorials.mnist import input_data\nmnist = input_data.read_data_sets(\"/tmp/data\", one_hot=True)\n\n# Parameters\nlearning_rate = 0.001\ntraining_epochs = 15\nbatch_size = 100\ndisplay_step = 1\n\n# Network Parameters\nn_hidden_1 = 256 # 1st layer number of neurons\nn_hidden_2 = 256 # 2nd layer number of neurons\nn_input = 784 # MNIST data input (img shape: 28*28)\nn_classes = 10 # MNIST total classes (0-9 digits)\n\n# tf Graph input\nX = tf.placeholder(\"float\", [None, n_input])\nY = tf.placeholder(\"float\", [None, n_classes])\n\n# Store layers weight & bias\nweights = {\n 'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),\n 'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),\n 'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))\n}\nbiases = {\n 'b1': tf.Variable(tf.random_normal([n_hidden_1])),\n 'b2': tf.Variable(tf.random_normal([n_hidden_2])),\n 'out': tf.Variable(tf.random_normal([n_classes]))\n}\n\n\n# Create model\ndef multilayer_perceptron(x):\n # Hidden fully connected layer with 256 neurons\n layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])\n # Hidden fully connected layer with 256 neurons\n layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])\n # Output fully connected layer with a neuron for each class\n out_layer = tf.matmul(layer_2, weights['out']) + biases['out']\n return out_layer\n\n# Construct model\nlogits = multilayer_perceptron(X)\n\n# Define loss and optimizer\nloss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(\n logits=logits, labels=Y))\noptimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)\ntrain_op = optimizer.minimize(loss_op)\n\n# Initializing the variables\ninit = tf.global_variables_initializer()\n\n# Train model\nmodel = tf.Session()\nmodel.run(init)\n\n# Training cycle\nfor epoch in range(training_epochs):\n avg_cost = 0.\n total_batch = int(mnist.train.num_examples/batch_size)\n # Loop over all batches\n for i in range(total_batch):\n batch_x, batch_y = mnist.train.next_batch(batch_size)\n # Run optimization op (backprop) and cost op (to get loss value)\n _, c = model.run([train_op, loss_op], feed_dict={X: batch_x,\n Y: batch_y})\n # Compute average loss\n avg_cost += c / total_batch\n # Display logs per epoch step\n if epoch % display_step == 0:\n print(\"Epoch:\", '%04d' % (epoch+1), \"cost=\", \\\n \"{:.9f}\".format(avg_cost))\nprint(\"Optimization Finished!\")\n\n# Test model\npred = tf.nn.softmax(logits) # Apply softmax to logits\ncorrect_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(Y, 1))\n# Calculate accuracy\naccuracy = tf.reduce_mean(tf.cast(correct_prediction, \"float\"))\nprint(\"Accuracy:\", accuracy.eval({X: mnist.test.images, Y: mnist.test.labels}, model))" + }, + { + "comment": "Write a function to generate a prime number sequence using Python.", + "code_snippet": "def generate_prime_numbers(n): \n primes = [True for i in range(n+ 1)] \n p = 2\n while (p * p <= n): \n if (primes[p] == True): \n for i in range(p * p, n + 1, p): \n primes[i] = False\n p += 1\n primes[0] = False\n primes[1] = False\n for p in range(n + 1): \n if primes[p]: \n print(p)" + }, + { + "comment": "Write a Python program that removes duplicates from a list.", + "code_snippet": "def remove_duplicates(my_list):\n return list(dict.fromkeys(my_list))\n\nmy_list = [1, 2, 3, 4, 3, 2, 1]\n\nprint(remove_duplicates(my_list)) # output [1, 2, 3, 4]" + }, + { + "comment": "Generate a Python script to sort the given list of words.", + "code_snippet": "# function to sort the given list of words\ndef sort_words(words):\n # sort the list of words in alphabetical order\n words.sort()\n\n # print the sorted list of words\n print(words)\n\n# driver code\nwords = ['bubble', 'selection', 'quicksort', 'insertion']\nsort_words(words)\n\n# output\n['bubble', 'insertion', 'quicksort', 'selection']" + }, + { + "comment": "Generate a function for calculating the average of a list of numbers in Python.", + "code_snippet": "def avg_list(lst):\n sum = 0\n count = 0\n for i in lst:\n sum += i\n count += 1\n avg = sum / count\n return avg" + }, + { + "comment": "Generate a Python program that compares two binary trees to check if they are similar.", + "code_snippet": "class TreeNode:\n def __init__(self, x):\n self.val = x\n self.left = None\n self.right = None\n\ndef are_similar(root1, root2):\n if root1 is None and root2 is None:\n return True\n if (root1 and root2) is None or (root1 or root2) is None:\n return False\n if root1.val != root2.val:\n return False\n else:\n return are_similar(root1.left, root2.left) and are_similar(root1.right, root2.right)" + }, + { + "comment": "Create a program for solving a quadratic equation in Python.", + "code_snippet": "import math\n \ndef solveQuadratic(a, b, c): \n discriminant = (b**2) - (4*a*c);\n \n # If discriminant is negative there are no real roots.\n if (discriminant < 0): \n return {\n \"status\": false,\n \"roots\": []\n }\n # If discriminant is zero, there is only one real roots.\n elif (discriminant == 0):\n x = -b/(2*a)\n return {\n \"status\": true,\n \"roots\": [x]\n }\n # Otherwise there are 2 real roots \n else:\n x1 = (-b + math.sqrt(discriminant)) / (2*a); \n x2 = (-b - math.sqrt(discriminant)) / (2*a); \n return {\n \"status\": true,\n \"roots\": [x1, x2]\n }\n\na = 1\nb = 3\nc = -4\n\nprint(solveQuadratic(a, b, c)) #{'status': True, 'roots': [-4.0, 1.0]}" + }, + { + "comment": "Construct a Python code for adding two matrices.", + "code_snippet": "def add_matrix(matrix1, matrix2):\n if len(matrix1) != len(matrix2):\n return 'Matrices not compatible'\n if len(matrix1[0]) != len(matrix2[0]):\n return 'Matrices not compatible'\n \n rows = len(matrix1)\n cols = len(matrix1[0])\n result = [[sum(row) for row in zip(*size)] for size in zip(matrix1, matrix2)]\n \n return result" + }, + { + "comment": "Generate a Python program which takes a string as input and prints all of its substrings in order from longest to shortest length.", + "code_snippet": "def substring_in_order(string): \n result = [] \n n = len(string) \n for l in range(n, 0, -1): \n for i in range(n-l + 1): \n result.append(string[i:i+l]) \n return result \n\nstring = input(\"Enter the string: \") \nprint (\"All substrings in order from longest to shortest:\") \nprint (substring_in_order(string))" + }, + { + "comment": "Generate a Python function which takes an array and a value and returns the index at which this value is present in the array.", + "code_snippet": "def find_index(arr, x):\n for i in range(len(arr)):\n if arr[i] == x:\n return i\n return -1\n\nprint(find_index(arr, x))" + }, + { + "comment": "Develop a Python program to find the common elements in two lists.", + "code_snippet": "common_elements = [] \n \n# using loop \nfor i in [3, 9, 15, 21, 27]: \n for j in [21, 7, 13, 29, 31]: \n if i == j: \n common_elements.append(i) \n \n# printing common elements \nprint(common_elements) \n# Output: [21]" + }, + { + "comment": "Create a Python program that takes in two strings and returns an integer indicating the number of common substrings between the two strings.", + "code_snippet": "def num_common_substrings(s1, s2):\n count = 0\n for i in range(len(s1)):\n for j in range(len(s2)):\n if s1[i] == s2[j]:\n count += 1\n return count\n\nresult = num_common_substrings(\"abcd\", \"bae\")\nprint(result)" + }, + { + "comment": "Generate a Python program to convert a string to its lowercase version.", + "code_snippet": "def str_to_lower(string):\n new_string = \"\"\n\n for char in string:\n new_string += char.lower()\n\n return new_string" + }, + { + "comment": "Can you generate a function in Python that shuffles a given list?", + "code_snippet": "import random\n\ndef shuffle_list(list): \n # Initialize the random number generator\n random.seed()\n\n # Shuffle the list\n random.shuffle(list) \n\n # Return the list\n return list" + }, + { + "comment": "Write a Python function to validate an a list of emails. The function should return a boolean indicating whether a given email address is valid.", + "code_snippet": "def validate_mail_list(mail_list):\n for mail in mail_list:\n if not re.match(\"[^@]+@[^@]+\\.[^@]+\", mail):\n return False\n return True" + }, + { + "comment": "Create a Python script that will perform several simple math operations. The script should accept two integers as inputs and output the results of the following operations: addition, subtraction, multiplication, division, and modulo.", + "code_snippet": "int1 = 10\nint2 = 5\n\naddition = int1 + int2\nprint('Addition:', addition)\n\nsubtraction = int1 - int2\nprint('Subtraction:', subtraction)\n\nmultiplication = int1 * int2\nprint('Multiplication:', multiplication)\n\ndivision = int1 / int2\nprint('Division:', division)\n\nmodulo = int1 % int2\nprint('Modulo:', modulo)" + }, + { + "comment": "Create a Python program to perform sentiment analysis on product reviews. The program should use natural language processing techniques like tokenization and n-grams to generate features for sentiment analysis and use a classification model to assign sentiments to the reviews.", + "code_snippet": "import numpy as np\nimport pandas as pd\nimport nltk\nfrom nltk.tokenize import word_tokenize\nimport matplotlib.pyplot as plt\nfrom nltk.corpus import stopwords\nfrom nltk.stem import WordNetLemmatizer \nfrom sklearn.feature_extraction.text import CountVectorizer\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.naive_bayes import MultinomialNB\nfrom sklearn.metrics import classification_report\n\n# Read in the data\ndata = pd.read_csv('reviews.csv')\n\n# Tokenise the texts\ndata['tokenised'] = data['review'].apply(word_tokenize)\n\n# Remove stopwords\nstop_words = set(stopwords.words('english'))\ndata['filtered'] = data['tokenised'].apply(lambda x: [val for val in x if val not in stop_words])\n\n# Normalize words\nwordnet_lemmatizer = WordNetLemmatizer()\ndata['normalised'] = data['filtered'].apply(lambda x: [wordnet_lemmatizer.lemmatize(val) for val in x])\n\n# Generate features using CountVectorizer\ncv = CountVectorizer()\ndata['features'] = cv.fit_transform(data['normalised'].astype('str')).toarray()\n\n# Split into training and test sets\nX = data['features']\ny = data['sentiment']\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n\n# Train model\nmodel = MultinomialNB(alpha=0.1)\nmodel.fit(X_train, y_train)\n\n# Predict using the test set\npredictions = model.predict(X_test)\n\n# Generate a classification report\nreport = classification_report(y_test, predictions)\nprint(report)" + }, + { + "comment": "Write a python program to clean up a list of emails and put them into a new list.", + "code_snippet": "original_list = ['bob@example.com', 'alice+bob@cafe.com', 'bob@gmail.com.uk', 'bob+alice@example']\n\nclean_list = []\n\nfor email in original_list:\n email_without_plus = email.split('+')[0]\n email_without_dot_uk = email_without_plus.split('.uk')[0]\n clean_list.append(email_without_dot_uk)\n\nprint(clean_list) # prints ['bob@example.com', 'alice@cafe.com', 'bob@gmail.com', 'bob@example']" + }, + { + "comment": "Generate sample data for a web application using Python's faker library. The output should contain three columns of data: name, email, and age.", + "code_snippet": "from faker import Faker\n\nfake = Faker()\nfor _ in range(10):\n print(fake.name(), fake.email(), fake.random_int(min=18, max=60))" + }, + { + "comment": "Create a Python algorithm to classify positive and negative sentiment in tweets.", + "code_snippet": "def sentiment_analysis(tweets):\n # Initialize a list to store the result \n results = [] \n\n # Iterate through each tweet\n for tweet in tweets:\n # Clean the tweet \n tweet = preprocessing(tweet)\n\n # Vectorize the tweet and obtain the prediction \n tweet_vector = vectorize(tweet)\n prediction = predict_sentiment(tweet_vector)\n\n # Append the prediction to the result list\n if prediction == 1:\n results.append(\"Positive\")\n elif preduction == 0:\n results.append(\"Negative\")\n\n return results" + }, + { + "comment": "Parse the given URL and print out the protocol, domain name, and path using Python.", + "code_snippet": "import urllib.parse\n\nurl = 'https://www.example.com/path/to/file'\nparsed_url = urllib.parse.urlparse(url)\n\n# Get the protocol\nprotocol = parsed_url.scheme\n\n# Get the domain name\ndomain_name = parsed_url.netloc\n\n# Get the path\npath = parsed_url.path\n\n# Print the results\nprint('Protocol:', protocol)\nprint('Domain name:', domain_name)\nprint('Path:', path)\n\n# Output:\n# Protocol: https\n# Domain name: www.example.com\n# Path: /path/to/file" + }, + { + "comment": "Write a recursive Python function to generate Fibonacci numbers.", + "code_snippet": "def fibonacci(n):\n if n == 1 or n == 2:\n return 1\n else:\n return fibonacci(n - 1) + fibonacci(n - 2)\n \nprint(fibonacci(10))" + }, + { + "comment": "Create a Kubernetes pod setup that publishes a tweet every 5 minutes.", + "code_snippet": "apiVersion: v1\nkind: Pod\nmetadata:\n name: tweet-publisher\nspec:\n containers:\n - name: tweet-publisher\n image: \n command:\n - sh\n - -c\n - 'crontab -lcrontab && while true; do python publish-tweet.py && crontab -l | { cat; echo \"*/5 * * * * python publish-tweet.py\"; } | crontab -; sleep 31622400; done'\n volumeMounts:\n - mountPath: /crontab/crontab\n subPath: crontab\n name: cronmount\n volumes:\n - name: cronmount\n configMap:\n name: tweet-cron\n---\napiVersion: v1\nkind: ConfigMap\nmetadata:\n name: tweet-cron\ndata:\n crontab: \"* * * * * python publish-tweet.py\"" + }, + { + "comment": "Write a code to generate a two-dimensional array with zeros shapes in Python", + "code_snippet": "\"\"\"\nWrite a code to generate a two-dimensional array with zeros shapes in Python\n\"\"\"\n\ndef generate_zeros_array(row,col):\n return [[0] * col for _ in range(row)]\n\n# Example\nrow = 3\ncol = 3\n\narray = generate_zeros_array(row, col)\nprint(array)\n# Output: [[0, 0, 0], [0, 0, 0], [0, 0, 0]]" + }, + { + "comment": "Write a Python code to create a dictionary which contains a data structrue in the form of [x:x^2] where x is the key and x^2 is the value.", + "code_snippet": "numbers = [1, 2, 3, 4, 5] \n\ndict_numbers = {num: num**2 for num in numbers}\nprint(dict_numbers)" + }, + { + "comment": "Create a Python script to print the numbers from 1 to 50, but for multiples of 3 print \"Fizz\" instead of the number, for multiples of 5 print \"Buzz\" instead of the number and for multiples of both 3 and 5 print \"FizzBuzz\".", + "code_snippet": "for i in range(1, 51):\n if i % 3 == 0 and i % 5 == 0:\n print(\"FizzBuzz\")\n elif i % 3 == 0:\n print(\"Fizz\")\n elif i % 5 == 0:\n print(\"Buzz\")\n else:\n print(i)" + }, + { + "comment": "Create a Python program to calculate the square root of a user-defined positive number.", + "code_snippet": "import math\n\nn = 100\n\n#Calculate the square root of a number\nsquare_root = math.sqrt(n)\n\nprint(f'Square root of {n} is {square_root}')" + }, + { + "comment": "Create a Python class called Rectangle to represent a two-dimensional rectangle with a length and width.", + "code_snippet": "class Rectangle:\n def __init__(self, length, width):\n self.length = length\n self.width = width\n\n def area(self):\n return self.length * self.width" + }, + { + "comment": "Create a data visualization dashboard in Python using Matplotlib.", + "code_snippet": "import matplotlib\nimport matplotlib.pyplot as plt\n\n# Generate the data for the dashboard\nx = [1,2,3,4,5]\ny = [10,25,30,50,80]\n\n# Create the figure and set the size\nfig = plt.figure(figsize=(10,6))\n\n# Configure the plot\nplt.plot(x, y, color='green', marker='o', linestyle='dashed', linewidth=2, markersize=12)\nplt.title(\"My Data Visualization Dashboard\")\nplt.xlabel('X-axis')\nplt.ylabel('Y-axis')\n\n# Show the plot\nplt.show()" + }, + { + "comment": "Edit the following python code to prompt the user to enter the value of radius to calculate the area of a circle.", + "code_snippet": "# Calculate area of a circle\n\nPi = 3.14\n\n# Get radius from user\nradius = float(input(\"Please enter the radius of the circle: \"))\n\n# Calculate the circle area\narea = Pi*radius**2\n\n# Print the area\nprint(\"The area of the circle is:\", area)" + }, + { + "comment": "Create a Python algorithm to identify the longest word in a sentence.", + "code_snippet": "def find_longest_word(sentence):\n words = sentence.split()\n # initialise longest_word variable with 0\n longest_word = 0\n for word in words:\n if len(word) > longest_word:\n longest_word = len(word)\n longest_word_str = word\n return longest_word_str" + }, + { + "comment": "Write a Python script that can convert degrees Fahrenheit to degrees Celsius.", + "code_snippet": "def FahrenheitToCelsius(temp_fahrenheit):\n temp_celsius = (temp_fahrenheit - 32) * 5/9\n return temp_celsius" + }, + { + "comment": "Generate a Python function that checks if a given number is an Armstrong number.", + "code_snippet": "def is_armstrong_number(num): \n n = len(str(num)) \n # initialize sum \n temp = num \n sum_of_digits = 0\n \n # find sum of nth power of individual digits \n while temp > 0: \n digit = temp % 10 \n sum_of_digits += digit ** n \n temp //= 10 \n \n # if num is equal to sum_of_digits then the number is an Armstrong number \n if num == sum_of_digits: \n return True \n \n else: \n return False" + }, + { + "comment": "Write a code in Python to print the following pattern:", + "code_snippet": "for i in range(5):\n for j in range(i+1):\n print(\"*\", end=\"\")\n print(\"\")\n\nfor i in range(5, 0, -1):\n for j in range(i):\n print(\"*\", end=\"\")\n print(\"\")" + }, + { + "comment": "Program a web scraper in Python to extract the latest stock prices from specific websites.", + "code_snippet": "import requests\nfrom bs4 import BeautifulSoup\n\n# input\nstocks = [\"AAPL stocks\", \"MSFT stocks\", \"FB stocks\", \"GOOG stocks\"]\n\n# process\ndef get_stock_price(stock):\n url = f\"https://finance.yahoo.com/quote/{stock}\"\n response = requests.get(url)\n soup = BeautifulSoup(response.text, 'html.parser')\n price = soup.find('div', {'class': 'My(6px) Pos(r) smartphone_Mt(6px)'}).find('span').text\n return price\n\n# output\nfor stock in stocks:\n price = get_stock_price(stock)\n print(f'The price of {stock} is {price}.')" + }, + { + "comment": "The code provides Python wrappers around TensorFlow ops for decoding LibSVM input to tensors. To use it, call the `decode_libsvm` function with the following arguments: `input` (a `Tensor` of type `string`), `num_features` (an `int` that is `>= 1`), `dtype` (an optional `tf.DType` from: `tf.float32, tf.float64, tf.int32, tf.int64`. Defaults to `tf.float32`), `label_dtype` (an optional `tf.DType` from: `tf.float32, tf.float64, tf.int32, tf.int64`. Defaults to `tf.int64`), and `name` (a name for the operation, optional). The function returns a tuple of `Tensor` objects `(label, feature_indices, feature_values, feature_shape)`, where `label` is a `Tensor` of type `label_dtype`, `feature_indices` is a `Tensor` of type `int64`, `feature_values` is a `Tensor` of type `dtype`, and `feature_shape` is a `Tensor` of type `int64`.", + "code_snippet": "\"\"\"Python wrappers around TensorFlow ops.\n\nThis file is MACHINE GENERATED! Do not edit.\nOriginal C++ source file: libsvm_ops.cc\n\"\"\"\n\nimport collections as _collections\nimport six as _six\n\nfrom tensorflow.python import pywrap_tensorflow as _pywrap_tensorflow\nfrom tensorflow.python.eager import context as _context\nfrom tensorflow.python.eager import core as _core\nfrom tensorflow.python.eager import execute as _execute\nfrom tensorflow.python.framework import dtypes as _dtypes\nfrom tensorflow.python.framework import errors as _errors\nfrom tensorflow.python.framework import tensor_shape as _tensor_shape\n\nfrom tensorflow.core.framework import op_def_pb2 as _op_def_pb2\n# Needed to trigger the call to _set_call_cpp_shape_fn.\nfrom tensorflow.python.framework import common_shapes as _common_shapes\nfrom tensorflow.python.framework import op_def_registry as _op_def_registry\nfrom tensorflow.python.framework import ops as _ops\nfrom tensorflow.python.framework import op_def_library as _op_def_library\nfrom tensorflow.python.util.tf_export import tf_export\n\n\n_decode_libsvm_outputs = [\"label\", \"feature_indices\", \"feature_values\",\n \"feature_shape\"]\n_DecodeLibsvmOutput = _collections.namedtuple(\n \"DecodeLibsvm\", _decode_libsvm_outputs)\n\n\n@tf_export('decode_libsvm')\ndef decode_libsvm(input, num_features, dtype=_dtypes.float32, label_dtype=_dtypes.int64, name=None):\n r\"\"\"Convert LibSVM input to tensors. The output consists of\n\n a label and a feature tensor. The shape of the label tensor\n is the same as input and the shape of the feature tensor is\n `[input_shape, num_features]`.\n\n Args:\n input: A `Tensor` of type `string`. Each string is a record in the LibSVM.\n num_features: An `int` that is `>= 1`. The number of features.\n dtype: An optional `tf.DType` from: `tf.float32, tf.float64, tf.int32, tf.int64`. Defaults to `tf.float32`.\n label_dtype: An optional `tf.DType` from: `tf.float32, tf.float64, tf.int32, tf.int64`. Defaults to `tf.int64`.\n name: A name for the operation (optional).\n\n Returns:\n A tuple of `Tensor` objects (label, feature_indices, feature_values, feature_shape).\n\n label: A `Tensor` of type `label_dtype`. A tensor of the same shape as input.\n feature_indices: A `Tensor` of type `int64`. A 2-D int64 tensor of dense_shape [N, ndims].\n feature_values: A `Tensor` of type `dtype`. A 1-D tensor of any type and dense_shape [N].\n feature_shape: A `Tensor` of type `int64`. A 1-D int64 tensor of dense_shape [ndims].\n \"\"\"\n _ctx = _context._context\n if _ctx is None or not _ctx._eager_context.is_eager:\n num_features = _execute.make_int(num_features, \"num_features\")\n if dtype is None:\n dtype = _dtypes.float32\n dtype = _execute.make_type(dtype, \"dtype\")\n if label_dtype is None:\n label_dtype = _dtypes.int64\n label_dtype = _execute.make_type(label_dtype, \"label_dtype\")\n _, _, _op = _op_def_lib._apply_op_helper(\n \"DecodeLibsvm\", input=input, num_features=num_features, dtype=dtype,\n label_dtype=label_dtype, name=name)\n _result = _op.outputs[:]\n _inputs_flat = _op.inputs\n _attrs = (\"dtype\", _op.get_attr(\"dtype\"), \"label_dtype\",\n _op.get_attr(\"label_dtype\"), \"num_features\",\n _op.get_attr(\"num_features\"))\n _execute.record_gradient(\n \"DecodeLibsvm\", _inputs_flat, _attrs, _result, name)\n _result = _DecodeLibsvmOutput._make(_result)\n return _result\n\n else:\n try:\n _result = _pywrap_tensorflow.TFE_Py_FastPathExecute(\n _ctx._context_handle, _ctx._eager_context.device_name, \"DecodeLibsvm\",\n name, _ctx._post_execution_callbacks, input, \"dtype\", dtype,\n \"label_dtype\", label_dtype, \"num_features\", num_features)\n _result = _DecodeLibsvmOutput._make(_result)\n return _result\n except _core._FallbackException:\n return decode_libsvm_eager_fallback(\n input, dtype=dtype, label_dtype=label_dtype,\n num_features=num_features, name=name, ctx=_ctx)\n except _core._NotOkStatusException as e:\n if name is not None:\n message = e.message + \" name: \" + name\n else:\n message = e.message\n _six.raise_from(_core._status_to_exception(e.code, message), None)\n\n\ndef decode_libsvm_eager_fallback(input, num_features, dtype=_dtypes.float32, label_dtype=_dtypes.int64, name=None, ctx=None):\n r\"\"\"This is the slowpath function for Eager mode.\n This is for function decode_libsvm\n \"\"\"\n _ctx = ctx if ctx else _context.context()\n num_features = _execute.make_int(num_features, \"num_features\")\n if dtype is None:\n dtype = _dtypes.float32\n dtype = _execute.make_type(dtype, \"dtype\")\n if label_dtype is None:\n label_dtype = _dtypes.int64\n label_dtype = _execute.make_type(label_dtype, \"label_dtype\")\n input = _ops.convert_to_tensor(input, _dtypes.string)\n _inputs_flat = [input]\n _attrs = (\"dtype\", dtype, \"label_dtype\", label_dtype, \"num_features\",\n num_features)\n _result = _execute.execute(b\"DecodeLibsvm\", 4, inputs=_inputs_flat,\n attrs=_attrs, ctx=_ctx, name=name)\n _execute.record_gradient(\n \"DecodeLibsvm\", _inputs_flat, _attrs, _result, name)\n _result = _DecodeLibsvmOutput._make(_result)\n return _result\n\n_ops.RegisterShape(\"DecodeLibsvm\")(None)\n\ndef _InitOpDefLibrary(op_list_proto_bytes):\n op_list = _op_def_pb2.OpList()\n op_list.ParseFromString(op_list_proto_bytes)\n _op_def_registry.register_op_list(op_list)\n op_def_lib = _op_def_library.OpDefLibrary()\n op_def_lib.add_op_list(op_list)\n return op_def_lib\n# op {\n# name: \"DecodeLibsvm\"\n# input_arg {\n# name: \"input\"\n# type: DT_STRING\n# }\n# output_arg {\n# name: \"label\"\n# type_attr: \"label_dtype\"\n# }\n# output_arg {\n# name: \"feature_indices\"\n# type: DT_INT64\n# }\n# output_arg {\n# name: \"feature_values\"\n# type_attr: \"dtype\"\n# }\n# output_arg {\n# name: \"feature_shape\"\n# type: DT_INT64\n# }\n# attr {\n# name: \"dtype\"\n# type: \"type\"\n# default_value {\n# type: DT_FLOAT\n# }\n# allowed_values {\n# list {\n# type: DT_FLOAT\n# type: DT_DOUBLE\n# type: DT_INT32\n# type: DT_INT64\n# }\n# }\n# }\n# attr {\n# name: \"label_dtype\"\n# type: \"type\"\n# default_value {\n# type: DT_INT64\n# }\n# allowed_values {\n# list {\n# type: DT_FLOAT\n# type: DT_DOUBLE\n# type: DT_INT32\n# type: DT_INT64\n# }\n# }\n# }\n# attr {\n# name: \"num_features\"\n# type: \"int\"\n# has_minimum: true\n# minimum: 1\n# }\n# }\n_op_def_lib = _InitOpDefLibrary(b\"\\n\\311\\001\\n\\014DecodeLibsvm\\022\\t\\n\\005input\\030\\007\\032\\024\\n\\005label\\\"\\013label_dtype\\032\\023\\n\\017feature_indices\\030\\t\\032\\027\\n\\016feature_values\\\"\\005dtype\\032\\021\\n\\rfeature_shape\\030\\t\\\"\\033\\n\\005dtype\\022\\004type\\032\\0020\\001:\\010\\n\\0062\\004\\001\\002\\003\\t\\\"!\\n\\013label_dtype\\022\\004type\\032\\0020\\t:\\010\\n\\0062\\004\\001\\002\\003\\t\\\"\\027\\n\\014num_features\\022\\003int(\\0010\\001\")\n" + }, + { + "comment": "Write a Python program to compute the sum of the values of a given dictionary", + "code_snippet": "def dict_sum(dict):\n sum = 0\n for key, value in dict.items():\n sum += value\n return sum" + }, + { + "comment": "Create a REST API in Python that can be used to delete an item from a list.", + "code_snippet": "from flask import Flask, request\nimport json\n\napp = Flask(__name__)\nitems = [\"Apple\", \"Banana\", \"Orange\"]\n\n@app.route('/items', methods=[\"GET\"])\ndef get_items():\n return json.dumps(items)\n\n@app.route('/items/', methods=[\"DELETE\"])\ndef delete_item(item):\n if item in items:\n items.remove(item)\n message = \"Item successfully deleted.\"\n else:\n message = \"Item not found.\"\n return json.dumps({\"message\": message})" + }, + { + "comment": "Read a given list of strings and create a new list of all strings that are longer than 5 characters long.", + "code_snippet": "def filter_long_strings(string_list):\n long_strings = []\n for string in string_list:\n if len(string) > 5:\n long_strings.append(string)\n return long_strings\n\nstring_list = ['Python', 'is', 'Fun']\n\nlong_strings = filter_long_strings(string_list)\nprint(long_strings)" + }, + { + "comment": "Craft an algorithm in Python to move all zeroes in a list to the end of the list.", + "code_snippet": "def move_zeroes(lst):\n # Count the number of non-zero elements\n num_non_zeroes = 0\n for num in lst:\n if num != 0:\n num_non_zeroes += 1\n \n # Initialize the new list\n new_lst = []\n \n # Move all non-zeroes to the beginning of the new list\n for i in range(num_non_zeroes):\n new_lst.append(lst[i])\n \n # Append 0's to the end\n for _ in range(len(lst)-num_non_zeroes):\n new_lst.append(0)\n \n # Return the new list\n return new_lst\n\nlst = [3, 0, 1, 0, 5, 0, 2]\nresult = move_zeroes(lst)\nprint(result)" + }, + { + "comment": "Create a Python program to implement the Fibonacci sequence.", + "code_snippet": "def fibonacci_seq(n): \n if n==0: \n return 0\n elif n==1: \n return 1\n else: \n return fibonacci_seq(n-1)+fibonacci_seq(n-2)" + }, + { + "comment": "Write a function in Python to print out an array backwards.", + "code_snippet": "def print_reverse_array(arr):\n for i in range(len(arr)-1, -1, -1): \n print(arr[i])" + }, + { + "comment": "Create a Python program to find the possible prime numbers from one to one hundred by using the Sieve of Eratosthenes algorithm.", + "code_snippet": "def sieve_eratosthenes(n): \n prime_list = [True] * (n+1) \n prime_list[0] = False\n prime_list[1] = False\n primes = [] \n \n for i in range(2, n+1): \n if prime_list[i] == True: \n primes.append(i) \n for j in range(i*i, n+1, i): \n prime_list[j] = False\n return primes \n \nn = 100\nprint(\"The Prime numbers from 1 to 100 are:\")\nprint(sieve_eratosthenes(n))" + }, + { + "comment": "Create a Python function named \"Transpile\" that takes in four arguments: \"src_dir\", \"dst_dir\", \"model_filename\", and \"params_filename\". Inside the function, create a CPUPlace object and an Executor object using Paddle Fluid. Then, create a Scope object using fluid.core.Scope(). Load the inference program, feed target names, and fetch targets using fluid.io.load_inference_model() with the \"src_dir\", \"exe\", \"model_filename\", and \"params_filename\" arguments. Clone the inference program and apply the inference transpiler using fluid.InferenceTranspiler() and t.transpile(). Save the optimized inference program to \"dst_dir\" using fluid.io.save_persistables() and the \"inference_transpiler_program\" and \"params_filename\" arguments. If \"dst_dir\" does not exist, create it using os.makedirs(). Finally, create a main function that takes in arguments using argparse.ArgumentParser() and calls the \"Transpile\" function with the parsed arguments.", + "code_snippet": "import os\nimport sys\nimport argparse\nimport paddle.fluid as fluid\n\n\ndef Transpile(src_dir, dst_dir, model_filename, params_filename):\n place = fluid.CPUPlace()\n exe = fluid.Executor(place)\n\n inference_scope = fluid.core.Scope()\n with fluid.scope_guard(inference_scope):\n print \"Loading inference_program from \", src_dir\n [inference_program, feed_target_names,\n fetch_targets] = fluid.io.load_inference_model(src_dir, exe, model_filename, params_filename)\n\n inference_transpiler_program = inference_program.clone()\n # NOTE: Applying the inference transpiler will change the inference_transpiler_program.\n t = fluid.InferenceTranspiler()\n # Under the with statement, inference_scope is the global scope.\n t.transpile(inference_transpiler_program, place)\n\n #print inference_transpiler_program\n\n print \"Saving the optimized inference_program to \", dst_dir\n # There is a bug in fluid.io.save_inference_model, so we can use the following code instead.\n if not os.path.isdir(dst_dir):\n os.makedirs(dst_dir)\n model_path = os.path.join(dst_dir, model_filename)\n with open(model_path, \"wb\") as f:\n f.write(inference_transpiler_program.desc.serialize_to_string())\n fluid.io.save_persistables(exe, dst_dir, inference_transpiler_program, params_filename)\n\n\ndef main():\n parser = argparse.ArgumentParser()\n parser.add_argument('--src_dir', help='Source directory of inference model')\n parser.add_argument('--dst_dir', help='Dstination directory of inference model')\n parser.add_argument('--model_filename', default=None, help='The name of model file')\n parser.add_argument('--params_filename', default=None, help='The name of params file')\n args = parser.parse_args()\n Transpile(args.src_dir, args.dst_dir, args.model_filename, args.params_filename)\n\n\nif __name__ == '__main__':\n main()\n" + }, + { + "comment": "Create a Python function called add_marks which takes two parameters: marks(a list) and mark(an integer). The function should add the mark to each value in the marks list.", + "code_snippet": "def add_marks(marks, mark):\n return [m + mark for m in marks]" + }, + { + "comment": "Design a positive feedback loop for the following python program.", + "code_snippet": "for i in range(10):\n print(\"hello\")\n i += 1\n if i == 10:\n print(\"You've got it!\")" + }, + { + "comment": "Implement a function that prints out the numbers from 1 to 50 in python", + "code_snippet": "def print_numbers():\n for i in range(1, 51):\n print(i)\n\nprint_numbers()" + }, + { + "comment": "Write a Python program to send an automated email. The program should take a list of emails as input, iterate over the list, and send an email to each address.", + "code_snippet": "import smtplib\n\nsender_email = 'example@gmail.com'\npassword = 'example_password'\n\nfor email in recipient_emails:\n with smtplib.SMTP('smtp.gmail.com', 587) as server:\n server.ehlo()\n server.starttls()\n server.login(sender_email, password)\n\n subject = 'Automated Email from Program'\n message = 'Message body'\n \n server.sendmail(sender_email, email, 'Subject: ' + subject + '\\n\\n' + message)\n server.quit()\n print('Email sent successfully.')" + }, + { + "comment": "Create a Python class which represents a 3D Vector", + "code_snippet": "class Vector3D:\n def __init__(self, x, y, z):\n self.x = x\n self.y = y\n self.z = z\n \n def __str__(self):\n return \"(\" + str(self.x) + \", \" + str(self.y) + \", \" + str(self.z) + \")\"" + }, + { + "comment": "Given two strings, write a Python program to find the total number of common characters present in both strings", + "code_snippet": "def num_common_chars(str1, str2):\n common = 0\n for c in str1:\n if c in str2:\n common += 1\n return common" + }, + { + "comment": "Modify the following Python code to handle errors.", + "code_snippet": "def read_data_from_json(filename):\n try:\n with open(filename, 'r') as f:\n data = json.load(f)\n return data\n except FileNotFoundError:\n print(\"The file specified does not exist.\")\n except json.decoder.JSONDecodeError:\n print(\"The file specified is not in a valid JSON format.\")" + }, + { + "comment": "Analyze a given Python code and write a function that checks if a given number is prime.", + "code_snippet": "def primeCheck(num):\n if num > 1:\n for i in range(2, num):\n if (num % i) == 0:\n return False\n else:\n return False\n return True" + }, + { + "comment": "Create a python program to filter out all the even numbers from a given list.", + "code_snippet": "def filter_even_numbers(numbers):\n even_numbers = []\n for i in numbers:\n if i % 2 == 0:\n even_numbers.append(i)\n return even_numbers\n \n \nfiltered = filter_even_numbers([3, 4, 5, 6, 7])\nprint(filtered)" + }, + { + "comment": "Implement a function in Python that takes a Huffman tree and drop all its leaf nodes.", + "code_snippet": "def dropLeafNodes(head): \n \n # If node is not None and doesn't have any children \n if head and not head.left and not head.right: \n return None\n \n # Don't change anything if head is None \n if not head: \n return None\n \n # Recursively call dropLeafNodes on left and right subtrees\n head.left = dropLeafNodes(head.left) \n head.right = dropLeafNodes(head.right) \n \n # Return head of the modified tree\n return head" + }, + { + "comment": "Create a web crawler in Python that starts from a website and looks for other links in the page then scrapes the page content.", + "code_snippet": "import requests\nfrom bs4 import BeautifulSoup\nimport re\n\ndef crawl(url):\n visited = set()\n queue = [url] \n \n while queue:\n curr = queue.pop(0)\n visited.add(curr) \n response = requests.get(curr)\n \n if response.ok:\n soup = BeautifulSoup(response.text, 'html.parser')\n\n# scrape the page here \n\n# find all the links\n links = soup.find_all('a', href=re.compile('^http')) \n \n for link in links:\n if link['href'] not in visited:\n queue.append(link['href'])" + }, + { + "comment": "Write a Python program to calculate the maximum, minimum and mean number of elements in a given string.", + "code_snippet": "def get_stats_for_string(string):\n num_elements = len(string)\n max_element = max(string)\n min_element = min(string)\n mean_element = sum(string) / num_elements\n return (max_element, min_element, mean_element)\n\nstring = \"abcdef\"\nstats_for_string = get_stats_for_string(string)\nprint(\"Max: {0}, Min: {1}, Mean: {2}\".format(*stats_for_string))" + }, + { + "comment": "Write a Python program to calculate the compound interest.", + "code_snippet": "def compoundInterest(investment_amount, rate_of_interest, time):\n compound_interest = investment_amount * (pow((1 + rate_of_interest / 100), time))\n return compound_interest\n\nci = compoundInterest(1000, 5, 5)\nprint(ci)" + }, + { + "comment": "Create a command-line application in Python that stores customer data and allows customer to query by customer name", + "code_snippet": "import sys \n \ndef print_customer_data(name): \n # Code for searching customer data by customer name and \n # printing results \n ...\n\n# Main function for the customer search application\nif __name__==\"__main__\": \n # Get command line arguments \n args = sys.argv \n \n if len(args) == 2: \n name = args[1]\n print_customer_data(name) \n else: \n print(\"Invalid arguments, please specify a customer name.\")" + }, + { + "comment": "Develop a program in Python to calculate the Fibonacci sequence.", + "code_snippet": "def fibonacci(n): \n a = 0\n b = 1\n if n < 0: \n print(\"Incorrect input\") \n elif n == 0: \n return a \n elif n == 1: \n return b \n else: \n for i in range(2,n): \n c = a + b \n a = b \n b = c \n return b \n\n# Driver Code \nprint(fibonacci(9))" + }, + { + "comment": "Generate a python program that takes two sets of integers as input and outputs the intersection of those sets.", + "code_snippet": "def intersection(set1, set2): \n return set1 & set2\n\nresult = intersection(set1, set2) \nprint(result)" + }, + { + "comment": "Design an algorithm in Python to find the smallest number in a given list.", + "code_snippet": "def smallestNumber(nums):\n smallest = nums[0]\n for i in range(1, len(nums)):\n if nums[i] < smallest:\n smallest = nums[i]\n return smallest\n\nnums = [4, 5, 6, 7, 8]\nsmallestNum = smallestNumber(nums)\nprint('The smallest number is', smallestNum)" + }, + { + "comment": "Generate a python program for printing a Pascal triangle.", + "code_snippet": "def PascalTriangle(n) : \n \n # An auxiliary array to store \n # generated pscal triangle values \n arr = [0 for i in range(n)] \n \n # Iterate through every line and \n # print integer(s) in it \n for line in range(0, n) : \n \n # Every line has number of \n # integers equal to line \n # number \n for i in range (0, line + 1) : \n \n # First and last values \n # in every row are 1 \n if (i == 0 or i == line) : \n arr[i] = 1\n \n # Other values are sum of \n # values just above and \n # left of above \n else : \n arr[i] = arr[i] + arr[i - 1] \n \n # Printing array in \n # mantainence of order \n for i in range (0, line + 1) : \n print(arr[i], end =\" \") \n print() \n \n# Driver Code \nn = 5\nPascalTriangle(n)" + }, + { + "comment": "Create a python script that filters out all negative numbers from the given matrix", + "code_snippet": "import numpy as np\n\ndef filter_out_negative(matrix):\n # Convert matrix to numpy array\n matrix = np.array(matrix)\n # Find all negative numbers and set them to 0\n matrix[matrix < 0] = 0\n return matrix\n\nresult = filter_out_negative([[-1, 2, -3], [-4, 5, 6], [-7, 8, 9]])\nprint(result)" + }, + { + "comment": "Create a basic REST API using Python and Flask to list all the countries in the world.", + "code_snippet": "import flask\nfrom flask import Flask, request, jsonify\nimport json\n\napp = Flask(name)\n\n@app.route('/countries', methods=['GET'])\ndef get_countries():\nwith open('data.json', 'r') as f:\n countries = json.load(f)\n return jsonify(countries)\n\nif name == 'main':\n app.run()" + }, + { + "comment": "Create a web scraper in Python that can extract hotel reviews from TripAdvisor.", + "code_snippet": "import requests\nfrom bs4 import BeautifulSoup\n\n# Define the URL and the headers\nurl = ''\nheaders = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'}\n\n# Make a request and get HTML in response\nresponse = requests.get(url, headers=headers)\n\n# Parse HTML\nsoup = BeautifulSoup(response.text, 'html.parser')\n\n# Find all reviews\nreviews = soup.find_all('div', {'class': 'review-container'})\n\n# Go through every review and extract the data\nfor review in reviews:\n # Get the title\n title = review.find('span', {'class': 'noQuotes'}).text\n\n # Get the text\n text = review.find('p', {'class': 'partial_entry'}).text\n\n # Print for demo\n print(title)\n print(text)\n print('-' * 50)" + }, + { + "comment": "Develop a machine learning model in Python to identify the author of a given article.", + "code_snippet": "from sklearn.feature_extraction.text import TfidfVectorizer\nfrom sklearn.naive_bayes import GaussianNB\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.model_selection import train_test_split\n\n# load data\nsamples = [\n \"sample 1 from AuthorA\",\n \"sample 2 from AuthorA\",\n \"sample 1 from AuthorB\",\n \"sample 2 from AuthorB\"\n]\nlabels = [\n \"AuthorA\",\n \"AuthorA\",\n \"AuthorB\",\n \"AuthorB\"\n]\n\n# build the model\nmodel = Pipeline([\n ('tfidf', TfidfVectorizer()),\n ('clf', GaussianNB())\n])\n\n# train the model\nX_train, X_test, y_train, y_test = train_test_split(samples, labels, test_size=0.3)\nmodel.fit(X_train, y_train)\n\n# evalulate the model\nscore = model.score(X_test, y_test)\nprint(f'model score: {score}')" + }, + { + "comment": "Write a Python program to divide a given list of numbers into two equal parts such that the difference between the sum of the two parts is minimum.", + "code_snippet": "def divide_in_two_equal_parts(lst):\n target = sum(lst) // 2\n part1 = []\n part2 = []\n left_sum = 0\n right_sum = 0\n \n for num in lst:\n if left_sum <= right_sum:\n left_sum += num\n part1.append(num)\n else:\n right_sum += num\n part2.append(num)\n \n return part1, part2\n\npart1, part2 = divide_in_two_equal_parts([2, 3, 4, 5, 6, 7])\n\nprint(\"Part 1:\", part1)\nprint(\"Part 2:\", part2)" + }, + { + "comment": "Develop an algorithm to detect and fix Python syntax errors in a given code snippet.", + "code_snippet": "def test():\n x = 5\n y = 3\n try:\n z\n except NameError:\n print(\"NameError: Name 'z' is not defined\")" + }, + { + "comment": "Design a data structure in Python to store integer values.", + "code_snippet": "class IntList:\n def __init__(self):\n self.values = []\n \n def add_value(self, value):\n self.values.append(value)\n \n def find_min(self):\n return min(self.values)" + }, + { + "comment": "Create an algorithm in Python for a linear search. The algorithm should take in an array and a value to search for, and return the index of the value if it is found, or -1 if it is not.", + "code_snippet": "def linearSearch(arr, value):\n for i in range(len(arr)):\n if arr[i] == value:\n return i # value found, return index\n return -1 # value not found\n\narr = [5, 3, 7, 2]\nsearchValue = 7\n\nresult = linearSearch(arr, searchValue)\n\nif result == -1:\n print(\"Element not found\")\nelse:\n print(\"Element present at index\", result)" + }, + { + "comment": "Given a list of strings, write a Python function to remove all the whitespaces from each string.", + "code_snippet": "def remove_whitespace(word_list):\n return [word.strip() for word in word_list]\n\nstripped_list = remove_whitespace(['Hello World ', 'Hi there', 'Good Morning'])\nprint(stripped_list)" + }, + { + "comment": "Design a Python program to randomize a list of given integers using an optimized Fisher-Yates algorithm.", + "code_snippet": "def randomized_list(num_list):\n for i in range(len(num_list)-1,0,-1):\n j = random.randint(0,i)\n num_list[i],num_list[j] = num_list[j],num_list[i]\n return num_list" + }, + { + "comment": "Create a system in Python for generating natural language responses.", + "code_snippet": "from tensorflow.keras.layers import Input, Dense, Embedding, GRU\nfrom tensorflow.keras.models import Model\nfrom tensorflow.keras.utils import plot_model\n\n# Create inputs\ninputs1 = Input(shape=(20,))\nx1 = Embedding(1000, 64)(inputs1)\nx1 = GRU(128)(x1)\n\n# Create outputs\ny1 = Dense(1000, activation='softmax')(x1)\n\n# Create model\nmodel = Model(inputs=[inputs1], outputs=[y1])\n\n# Compile the model\nmodel.compile(optimizer='adam',\n loss='categorical_crossentropy',\n metrics=['accuracy'])\n\n# Train the model\nmodel.fit([inputs], [outputs], epochs=10, batch_size=32)\n\n# Generate the response to the sample utterance\nquery = \"What time is it?\"\nprediction = model.predict(query)\n\nprint(prediction)" + }, + { + "comment": "Create a web scraper using Python that collects the title and content from the homepage of a given website.", + "code_snippet": "import requests\nimport bs4\n\nurl = 'https://www.example.com'\nr = requests.get(url)\nhtml = bs4.BeautifulSoup(r.text,\"html.parser\")\n\n# Find title\ntitle = html.find('title')\nprint(title.text)\n\n# Find content\ncontent = html.find('div', {'class': 'content'})\nprint(content.text)" + }, + { + "comment": "Generate a Python script that finds the total surface area of a cuboid, given its three dimensions.", + "code_snippet": "def find_total_surfacearea(length, breadth, height):\n return (2*(length*breadth + breadth*height + height*length))\n\nprint(find_total_surfacearea(2, 3, 4))\n\nOutput: 52" + }, + { + "comment": "Generate a Python program to clean a string of HTML tags.", + "code_snippet": "import re\n\n# Clean a string of HTML tags \ndef clean_html_tags(str):\n clean_str = re.sub(\"<.*?>\", \"\", str)\n return clean_str\n\nhtml_string = \"

This is a test string

\"\n\nclean_str = clean_html_tags(html_string)\n\nprint(clean_str) #This is a test string" + }, + { + "comment": "Generate a Python function that takes an integer array as input and returns the number of unique elements in the array.", + "code_snippet": "def num_unique_elements(arr):\n unique_arr = set(arr)\n return len(unique_arr)\n\nprint(num_unique_elements([1, 2, 3, 2, 1, 4, 7, 8, 9, 8]))" + }, + { + "comment": "Create a Python program to scrape an online news website to get the headlines.", + "code_snippet": "import requests\nfrom bs4 import BeautifulSoup \n\n# url of the website you want to scrape \nurl = \"https://www.news.com/\" \n\n# get the response in the form of html\nr = requests.get(url) \n\n# create a beautifulsoup object to parse contents \nsoup = BeautifulSoup(r.content, 'html5lib') \n\n# find headlines from the parsed page \nheadlines = soup.find_all('h2') \n\n# Print the headlines \nfor headline in headlines: \n print(headline.text)" + }, + { + "comment": "Edit the given Python program to print 'Happy Holidays' 10 times.", + "code_snippet": "for i in range(10):\n print('Happy Holidays')" + }, + { + "comment": "Combine 2 strings and reverse the result in Python.", + "code_snippet": "words = \"Hello \"\nwords2 = \"world!\"\n\ncombined = words + words2\nresult = combined[::-1]\n\nprint(result)" + }, + { + "comment": "Create a class to represent a bank account in Python.", + "code_snippet": "class BankAccount:\n \n def __init__(self, name, balance):\n self.name = name\n self.balance = balance\n \n def deposit(self, amount):\n self.balance += amount\n \n def withdraw(self, amount):\n if self.balance >= amount:\n self.balance -= amount\n return True\n else:\n return False\n \n def get_balance(self):\n return self.balance" + }, + { + "comment": "Optimize the following Python code to sort a set of numbers (set = [2, 6, 3, 8, 4]) in O(n) time complexity.", + "code_snippet": "def sort_list(unsorted_list):\n sorted_list = []\n while unsorted_list:\n min_elem = min(unsorted_list)\n unsorted_list.remove(min_elem)\n sorted_list.append(min_elem)\n return sorted_list" + }, + { + "comment": "Design a Python program to perform linear regression for predicting the numbers in a given dataset.", + "code_snippet": "import numpy as np\nfrom sklearn.linear_model import LinearRegression\n\nX = np.array([1,2,3,4,5]).reshape(-1,1)\nY = np.array([3,5,7,9,13])\n\nmodel = LinearRegression()\nmodel.fit(X, Y)\n\n# Predicts the value of y\nprint(model.predict([[7]]))" + }, + { + "comment": "Create a Python object that contains two attributes, \"Name\" and \"Age\".", + "code_snippet": "class Person:\n def __init__(self, name, age):\n self.name = name\n self.age = age\n\nperson = Person(\"John\", 25)" + }, + { + "comment": "Write a function in Python that takes a list of objects and returns a list of those objects sorted by one of their attributes.", + "code_snippet": "def sort_by_attr(objects, attr):\n return sorted(objects, key=lambda item: item[attr])\n \nsort_by_attr([{\"name\": \"Alice\", \"age\": 20}, {\"name\": \"Bob\", \"age\": 21}], \"age\")" + }, + { + "comment": "Write a Python program to generate an HTML table displaying information about the top 10 bestselling books in a specific genre.", + "code_snippet": "import sqlite3\n\n# connect to the database\nconn = sqlite3.connect('books.db')\nc = conn.cursor()\n\n# retrieve the data\nc.execute('''SELECT title, author, genre, sales\nFROM Books\nWHERE genre='Fantasy'\nORDER BY sales DESC\nLIMIT 10''')\n\n# save the results\nresults = c.fetchall()\n\n# generate the HTML code\nhtml = ''\nhtml += ''\n\nfor result in results:\n title = result[0]\n author = result[1]\n genre = result[2]\n sales = result[3]\n\n html += f''\n\nhtml += '
TitleAuthorGenreSales
{title}{author}{genre}{sales}
'\n\n# display the results\nprint(html)" + }, + { + "comment": "Create a Python script to filter out all the vowels from a string.", + "code_snippet": "def filter_vowels(string):\n vowels = ('a', 'e', 'i', 'o', 'u')\n return ''.join(c for c in string if c not in vowels)" + }, + { + "comment": "In Python, write a script to sort the list items in descending order.", + "code_snippet": "list_items = [3, 7, 5, 8, 10]\n\nlist_items.sort(reverse=True)\n\nprint(list_items) # Output: [10, 8, 7, 5, 3]" + }, + { + "comment": "Create a Python function that takes in a text string and returns a dictionary of words and the number of times each word appears in the text string.", + "code_snippet": "def words_count(text):\n words_dict = {}\n for word in text.split(' '):\n if word in words_dict:\n words_dict[word] += 1\n else:\n words_dict[word] = 1\n return words_dict\n\nwords_dict = words_count(\"A quick brown fox jumps over the lazy dog\")\nprint(words_dict)" + }, + { + "comment": "Create an object-oriented structure to solve a maze by using python.", + "code_snippet": "import collections\n\nclass Maze:\n def __init__(self, n, m):\n self._n, self._m = n, m\n self._grid = [[None] * m for _ in range(n)]\n\n def solve_maze(self):\n result, found = [], False\n start, end = (0, 0), (self._n-1, self._m-1)\n\n for i in range(self._n):\n for j in range(self._m):\n if self._grid[i][j] == 1:\n value = 0\n self._find_omega(i, j, value, result, start, end,\n found)\n return result\n\n def _find_omega(self, i, j, value,\n result, start, end, found):\n rowNbr, colNbr = [-1, 0, 0, 1], [0, -1, 1, 0]\n\n if (i, j) == end:\n found = True\n result.append(value)\n return\n\n if found == True:\n return\n\n self._grid[i][j] = 0\n for k in range(4):\n if self._on_board(i + rowNbr[k], j + colNbr[k]):\n self._find_omega(i + rowNbr[k], j + colNbr[k],\n value + 1, result,\n start, end, found)\n\n def _on_board(self, i, j):\n return 0 <= i < self._n and 0 <= j < self._m and self._grid[i][j] == 1" + }, + { + "comment": "Compose a piece of code to print the length of a given string.", + "code_snippet": "string = \"Python\"\nprint(\"Length of the given string is:\", len(string))" + }, + { + "comment": "Compose a Python program to calculate the maximum possible revenue from the items with given weights and values.", + "code_snippet": "\"\"\"\nCalculate the maximum possible revenue from the items with given weights and values\n\"\"\"\n\n# given weights and values \nweights = [2, 1, 3] \nvalues = [4, 2, 3] \n\n# number of items \nn = len(weights) \n \n# function to calculate the maximum revenue \ndef knapsack(weights, values, n): \n \n # total capacity of knapsack \n W = 5\n \n # matrix to store solution matrix \n K = [[0 for x in range(W+1)] for x in range(n+1)] \n \n # build the solution matrix in bottom up manner \n for i in range(n+1): \n for w in range(W+1): \n if i==0 or w==0: \n K[i][w] = 0\n elif weights[i-1] <= w: \n K[i][w] = max(values[i-1] + K[i-1][w-weights[i-1]], K[i-1][w]) \n else: \n K[i][w] = K[i-1][w] \n \n return K[n][W] \n \nprint (knapsack(weights, values, n))" + }, + { + "comment": "Write a Python program to read in a tweet and remove all punctuation, special characters and stopwords.", + "code_snippet": "import re\nimport string\nfrom nltk.corpus import stopwords\n\ntweet = \"I'm so excited to be going to the #MuseumTour! #BigDay #travel\"\ntweet_clean = re.sub(r'[^\\w\\s]','',tweet) #remove punctuation\ntokens = tweet_clean.split() #split string into individual words \nstop_words = set(stopwords.words(\"english\")) #get list of stopwords\nfiltered_tokens = [w for w in tokens if not w in stop_words] #remove stopwords\nprint(filtered_tokens)" + }, + { + "comment": "Implement a web scraper in Python to extract the top ten trending YouTube videos.", + "code_snippet": "import requests\nfrom bs4 import BeautifulSoup\n\n# make a request to get the page content\npage = requests.get(\"https://www.youtube.com/feed/trending\")\n\n# parse the page content\nsoup = BeautifulSoup(page.content, 'html.parser')\n\n# extract the list of trending videos\ntrending_videos = soup.find_all(class_='expanded-shelf-content-item-wrapper')\n\n# print the top ten trending videos\nfor video in trending_videos[:10]:\n print(video.find(class_='yt-uix-tile-link').get('title'))" + }, + { + "comment": "Make a code in Python to find the mode of the given set of values.", + "code_snippet": "def find_mode(list):\n max_count = 0\n mode = list[0]\n count = {}\n for item in list:\n if (item in count):\n count[item] += 1\n else:\n count[item] = 1\n if (count[item] > max_count):\n max_count = count[item]\n mode = item\n return mode" + }, + { + "comment": "Create a web API with Python and Flask to give the current date and time.", + "code_snippet": "from flask import Flask\nimport datetime\n\napp = Flask(__name__)\n\n@app.route('/datetime')\ndef get_datetime():\n return datetime.datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n\nif __name__ == '__main__':\n app.run()" + }, + { + "comment": "Create a Python function that takes a list containing fruit names and returns the most common fruit name.", + "code_snippet": "def most_common_fruit(fruit_list):\n \n # Dictionary to get count of each fruit\n freq_dict = {}\n \n # Count frequency of each fruit\n for fruit in fruit_list:\n if fruit in freq_dict:\n freq_dict[fruit] += 1\n else:\n freq_dict[fruit] = 1\n \n # Get maximum frequency \n max_freq = 0\n for freq in freq_dict.values():\n if freq > max_freq:\n max_freq = freq\n \n # Get the most common fruit\n most_common_fruit = None\n for fruit, freq in freq_dict.items():\n if freq == max_freq:\n most_common_fruit = fruit\n break\n \n return most_common_fruit" + }, + { + "comment": "Write a Python program to take in a list of phrases and generate a string with the each phrase followed by its length.", + "code_snippet": "result = \"A bird in the hand (17)is worth two in the bush (25)\"\n \ncode:\ninput_phrases = [\"A bird in the hand\", \"is worth two in the bush\"]\n \nresult = \"\"\nfor phrase in input_phrases:\n result += phrase + ' (' + str(len(phrase)) + ')'\n \nprint(result)" + }, + { + "comment": "Write a simple Python function that takes a list of integers and returns the sum of all elements divisible by 3.", + "code_snippet": "def divisibleBy3(li):\n total = 0\n for num in li:\n if num % 3 == 0:\n total += num\n return total\n# Output: 24" + }, + { + "comment": "Create a program in Python that takes a number from the user and prints all the divisors of the number.", + "code_snippet": "divisors = []\n\nnum = int(input('Enter a number: '))\n \nfor i in range(1, num + 1):\n if num % i == 0:\n divisors.append(i)\n\nprint('The divisors of',num ,'are:')\nprint(*divisors, sep=', ')" + }, + { + "comment": "Write a Python program to generate an HTML page containing a table with some employee data.", + "code_snippet": "\n \n Employee Data\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
NameJob TitleDepartmentSalary
John DoeSenior DeveloperIT$120,000
Jane DoeManagerFinance$95,000
Tim SmithJunior DeveloperIT$55,000
\n \n" + }, + { + "comment": "Create a Python function to select all rows from a table where the 'status' is 'approved'", + "code_snippet": "def select_approved_rows():\n sql = \"SELECT * FROM table WHERE status = 'approved'\"\n cur.execute(sql)\n return cur.fetchall()" + }, + { + "comment": "Create a Python program to generate random passwords of length 8 characters, containing at least one lowercase letter, one uppercase letter and one number.", + "code_snippet": "from random import choice\nimport string\n\ndef generate_password():\n chars = string.ascii_uppercase + string.ascii_lowercase + string.digits\n password = ''.join(choice(chars) for i in range(8))\n if any(c.islower() for c in password) and any(c.isupper() for c in password) and any(c.isdigit() for c in password):\n return password\n else: \n return generate_password()\n\nif __name__ == '__main__':\n print(generate_password())" + }, + { + "comment": "Generate a solution to translate a text from English to French using Python.", + "code_snippet": "# install the module \n# in command prompt\npip install googletrans\n\n# import module\nfrom googletrans import Translator\n\n# create an object\ntranslator = Translator()\n\n# define text\ntext = \"Hello, this is a sample text to translate.\"\n\n# call the function and pass the text\ntranslation = translator.translate(text, dest='fr') \n\n# print the translation \nprint(translation.text)" + }, + { + "comment": "Create a function to generate a unique 6 digit number.", + "code_snippet": "# Python program to generate \n# a unique 6 digit number \nimport random \ndef generate_number(): \n \n # Choose a random number \n # between 10000 and 99999 \n random_number = random.randint(10000, 99999) \n \n # Return the random number \n return random_number \n \n# Driver Code \nif __name__ == \"__main__\": \n print(generate_number())" + }, + { + "comment": "Write a Python program to generate 10 random numbers and put them in a list.", + "code_snippet": "import random\n\nrandom_list = []\nfor i in range(10):\n number = random.randint(1, 100)\n random_list.append(number)" + }, + { + "comment": "Write a Python program to replace all occurrences of a given string with an alternate string.", + "code_snippet": "def replace_string(input_string, target_string, replacement_string):\n return input_string.replace(target_string, replacement_string)\n\nprint(replace_string(\"Hello World!\", \"World\", \"Python\"))" + }, + { + "comment": "Generate a Python class for an online store item that includes fields for ID, name, price, and description.", + "code_snippet": "class StoreItem:\n def __init__(self, item_id, name, price, description):\n self.item_id = item_id\n self.name = name\n self.price = price\n self.description = description\n\n def __str__(self):\n return f'{self.name}, ${self.price}'\n\n def to_dict(self):\n return {\n 'item_id': self.item_id,\n 'name': self.name,\n 'price': self.price,\n 'description': self.description\n }" + }, + { + "comment": "Create a virtual assistant using Python and Dialogflow to answer a basic set of questions.", + "code_snippet": "import dialogflow_v2 as dialogflow\nimport requests\n\nproject_id = 'project-id'\nsession_client = dialogflow.SessionsClient()\nsession = session_client.session_path(project_id, 'session-id')\n\nwhile True:\n text = input('User: ')\n text_input = dialogflow.types.TextInput(text=text, language_code='en-US')\n query_input = dialogflow.types.QueryInput(text=text_input)\n response = session_client.detect_intent(session=session, query_input=query_input)\n\n print('Assistant:', response.query_result.fulfillment_text)" + }, + { + "comment": "Create an algorithm in Python to solve \"Tower of Hanoi\" game of a 4 disk problem.", + "code_snippet": "def solveTowerOfHanoi(num_disks, start_rod = 'A', end_rod = 'C', aux_rod = 'B'): \n if num_disks == 1: \n print(\"Move disk 1 from rod\", start_rod, \"to rod\", end_rod) \n return\n solveTowerOfHanoi(num_disks - 1, start_rod, aux_rod, end_rod) \n print(\"Move disk\", num_disks, \"from rod\", start_rod, \"to rod\", end_rod) \n solveTowerOfHanoi(num_disks - 1, aux_rod, end_rod, start_rod) \n \n \n# Driver code \nnum_disks = 4\nsolveTowerOfHanoi(num_disks)" + }, + { + "comment": "Generate an api response in Python for the provided data.", + "code_snippet": "response = {\n \"status\": 200,\n \"data\": {\"name\": \"John\",\"age\":28},\n \"message\": \"Success\"\n}" + }, + { + "comment": "Create a webscraper using Python\u2019s Selenium that retrieves data from an online store.", + "code_snippet": "from selenium import webdriver\n\n# create a new Firefox session\ndriver = webdriver.Firefox()\ndriver.implicitly_wait(30)\ndriver.maximize_window()\n\n# navigate to the application home page\ndriver.get(\"https://www.example.com/\")\n\n# get the search textbox\nsearch_field = driver.find_element_by_name(\"q\")\nsearch_field.clear()\n\n# enter search keyword and submit\nsearch_field.send_keys(\"shoes\")\nsearch_field.submit()\n\n# get the list of elements which are displayed after the search\n# currently on result page using find_elements_by_class_name\nproduct_list = driver.find_elements_by_class_name(\"product\")\n\n# iterate through each element and print the text that is\n# name of the product\nfor product in product_list:\n print(product.text)\n\n# close the browser window\ndriver.quit()" + }, + { + "comment": "Develop a web scraper with Python BeautifulSoup to scrape data from an online web store.", + "code_snippet": "import requests\nfrom bs4 import BeautifulSoup\n \n# Make a GET request to fetch the raw HTML content\nhtml_content = requests.get('http://web-store.com').text\n \n# Parse the html content\nsoup = BeautifulSoup(html_content, \"lxml\")\n \n# Get the product data\nproducts = soup.find_all('div', attrs={\"itemprop\": \"product\"})\nfor product in products:\n name = product.find(\"span\", attrs={\"itemprop\": \"name\"}).text\n price = product.find(\"span\", attrs={\"itemprop\": \"price\"}).text\n description = product.find(\"span\", attrs={\"itemprop\": \"description\"}).text\n print(name, price, description)" + }, + { + "comment": "Implement a basic algorithm in Python to calculate the nth Fibonacci number.", + "code_snippet": "def fibonacci(n):\n if n == 0:\n return 0\n elif n == 1:\n return 1\n else:\n return fibonacci(n - 1) + fibonacci(n - 2)\n \nprint(fibonacci(6)) // 8" + }, + { + "comment": "Create a Python class that receives two strings and has methods for comparing the strings and returning whether they are equal, different, or similar up to a provided threshold.", + "code_snippet": "class StringComparator:\n\ndef __init__(self, string1, string2):\n self.string1 = string1\n self.string2 = string2\n\ndef is_equal(self):\n return self.string1 == self.string2\n\ndef is_different(self):\n return self.string1 != self.string2\n\ndef is_similar(self, threshold=0.5):\n distance = edit_distance(self.string1, self.string2)\n max_length = max(len(self.string1), len(self.string2))\n\n return distance / max_length < threshold" + }, + { + "comment": "Generate a program in Python to evaluate the accuracy of a machine-learning model.", + "code_snippet": "import numpy as np\nfrom sklearn.metrics import accuracy_score\n\nclf.fit(X_train, y_train)\ny_pred = clf.predict(X_test)\n\n# Calculating the accuracy\naccuracy = accuracy_score(y_test, y_pred)\n\nprint(\"Accuracy: \" + str(accuracy))" + }, + { + "comment": "Write a Python program to replace all occurrences of a character in a string with the provided replacement character.", + "code_snippet": "def replace_all_occurrences(str, char, replacement):\n return str.replace(char, replacement)\n\nprint(replace_all_occurrences(\"Hello World!\", 'l', 'X'))" + }, + { + "comment": "Create a python program to find the largest prime factor of a number.", + "code_snippet": "import math\n\ndef largest_prime_factor(n):\n\n# Separate the list of larger factors of the number into prime and composite numbers \n factors = []\n for i in range(2, int(math.sqrt(n)) + 1):\n if n % i == 0:\n factors.append(i)\n factors.append(int(n/i))\n# Find the largest prime factor in the list\n prime_factors = []\n for potential_factor in factors:\n is_prime = True\n for j in range(2,int(math.sqrt(potential_factor))+1):\n if potential_factor % j == 0 and potential_factor != j: \n is_prime = False\n if is_prime:\n prime_factors.append(potential_factor)\n\n# Return the largest prime factor\n return max(prime_factors)\n\nprint(largest_prime_factor(331))" + }, + { + "comment": "Write a Python program to check if an element exists within a given list of integers.", + "code_snippet": "def element_exists(elem, list_int):\n return elem in list_int" + }, + { + "comment": "Create a Python program that takes a sentence and converts it to Morse code.", + "code_snippet": "MORSE_CODE = {\n 'A': '.-', 'B': '-...',\n 'C': '-.-.', 'D': '-..',\n 'E': '.', 'F': '..-.',\n 'G': '--.', 'H': '....',\n 'I': '..', 'J': '.---',\n 'K': '-.-', 'L': '.-..',\n 'M': '--', 'N': '-.',\n 'O': '---', 'P': '.--.',\n 'Q': '--.-', 'R': '.-.',\n 'S': '...', 'T': '-',\n 'U': '..-', 'V': '...-',\n 'W': '.--', 'X': '-..-',\n 'Y': '-.--', 'Z': '--..',\n '0': '-----', '1': '.----',\n '2': '..---', '3': '...--',\n '4': '....-', '5': '.....',\n '6': '-....', '7': '--...',\n '8': '---..', '9': '----.',\n ' ': ' ', '.': '.-.-.-',\n ',': '--..--', '?': '..--..',\n '/': '-..-.', '-': '-....-',\n '(': '-.--.-', ')': '-.--.-'\n}\n\ndef encode(message):\n code = ''\n for letter in message:\n code += MORSE_CODE[letter.upper()] + ' '\n \n return code\n\nprint(encode(\"Hello world\"))\n# Output: .... . .-.. .-.. --- .-- --- .-. .-.. -.." + }, + { + "comment": "Generate a python program to sort a list of numbers", + "code_snippet": "def sort_list(array): \n n = len(array) \n \n for i in range(n): \n \n min_idx = i \n for j in range(i+1, n): \n if array[min_idx] > array[j]: \n min_idx = j \n array[i], array[min_idx] = array[min_idx], array[i]\n\nif __name__ == \"__main__\":\n array = [4, 2, 6, 7, 1]\n sort_list(array)\n print(array)" + }, + { + "comment": "Create a program in Python to remove all the occurrences of an item from a list.", + "code_snippet": "def removeItem(list, item):\n # loop through the list\n for i in list:\n # check if the element is the item to remove\n if i == item:\n # remove the item\n list.remove(i)\n # return the list\n return list\n\n#example\nlist = [1, 2, 3, 4, 2, 5, 2]\nresult = removeItem(list, 2)\n\nprint(result)\n# Output: [1, 3, 4, 5]" + }, + { + "comment": "Could you build a web scraper in Python that collects all the links from a website?", + "code_snippet": "# Python program to scrape a webpage\nimport requests as req \nfrom bs4 import BeautifulSoup \n \nURL = \"http://www.example.com/\"\n \n# GET request \nr = req.get(URL) \n \n# parse the downloaded homepage and grab all text\nsoup = BeautifulSoup(r.text, \"html.parser\") \n \n# find and print all links \nlinks = soup.find_all('a') \nfor link in links: \n print(link.get('href'))" + }, + { + "comment": "Generate a Python program to delete a given key from a dictionary.", + "code_snippet": "dictionary = {'foo': 1, 'bar': 2, 'baz': 3}\nkey = 'baz'\n\ndef delete_key(dic, k):\n if k in dic:\n del dic[k]\n\ndelete_key(dictionary, key)\n\nprint(dictionary)" + }, + { + "comment": "Analyze the following Python function to detect any potential errors.", + "code_snippet": "The function is attempting to modify the list while iterating over it. This can lead to unexpected behavior and should be avoided. To fix the code, the list should be copied before the iteration takes place: \n\ndef expand_list(list):\n list_copy = list[:] # make a copy of the list\n for item in list_copy:\n list *= 2" + }, + { + "comment": "Generate a Python program to reverse the order of words in a sentence but keep the order of the characters in each word the same.", + "code_snippet": "def reverse_words(string):\n words = string.split(\" \")\n reversed_words = []\n for word in words:\n reversed_words.insert(0, word)\n return ' '.join(reversed_words)\n\nstring = 'This is a sentence.'\nreverse_words = reverse_words(string)\nprint(reverse_words)" + }, + { + "comment": "Port a Java code snippet to Python 3.", + "code_snippet": "def foo(a, b):\n return a + b" + }, + { + "comment": "Generate a Python program that takes a list of strings as input and prints the strings in alphabetical order.", + "code_snippet": "strings = ['cat', 'dog', 'apple', 'banana']\n\nstrings.sort()\n\nfor string in strings:\n print(string)" + }, + { + "comment": "Build a Python program that extracts all email addresses from a given string.", + "code_snippet": "import re\n\ndef find_emails(text):\n regex = r\"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\\.[a-zA-Z0-9-.]+)\"\n emails = re.findall(regex, text)\n return emails\n\nemails = find_emails(\"This is an example with two emails: john@example.com, and jane@example.com\")\nprint(emails)" + }, + { + "comment": "Create a Python code to generate a random string of length 8 made of uppercase letters and numbers.", + "code_snippet": "import string\nimport random\n\nlength = 8\nchars = string.ascii_uppercase + string.digits\npassword = ''.join(random.choice(chars) for i in range(length))\n\nprint (password)" + }, + { + "comment": "This code calculates the percent of transcripts mapping to rRNA. To use it, you need to supply two inputs: an HTseq_counts file and an original reference transcriptome aligned to. To produce tables with rRNA and MT counts for each sample, run the command \"./rRNA_MT_count.py --gtf /path/to/GTF/file --htseq /path/to/HTseq_counts/file\". The script will extract all the identifiers of genes/transcripts mapping to the rRNA and MT genes and store them in two arrays. Then, it will count reads matching the rRNA identifiers and write the output files. The summary output file will be named \"HTseq_counts_file_rRNAmtRNACounts.txt\".", + "code_snippet": "#!/usr/bin/python\n\n# Malwina Prater, mn367@cam.ac.uk, 2017, Copyright\n# Centre for Trophoblast Research, University of Cambridge\n#\n# Script version: v01.\n#\n# Script to calculate the percent of transcripts mapping to rRNA\n#\n# INPUTS :\n# 1. HTseq_counts file\n# 2. Original reference transcriptome alignned to\n#\n# USAGE : \n# For producing table(s) with rRNA and MT counts for each sample use commands like that:\n#\n# ./rRNA_MT_count.py --gtf /Users/malwina/Documents/CTR-Data/genomes/Mus_musculus/mm10/Mus_musculus.GRCm38.84.gtf --htseq C17_3_S20_Aligned.out.srt.bam_htseq_combined_counts.txt\n#\n\n\n# import modules: \nimport os,sys\nfrom optparse import OptionParser\nimport re\n\n# parse in the user options:\n\nparser = OptionParser(usage=\"%prog [-x Excel [-i imagefile] [-s squares]\",\n version=\"%prog 0.1\")\n\nparser.add_option(\"--htseq\", dest=\"FileName\", type=\"string\", action=\"store\")\nparser.add_option(\"--gtf\", dest=\"GTF\", type=\"string\", action=\"store\")\n\n(options, args) = parser.parse_args()\n\n\n#files = sys.argv[]\nHTSEQ_COUNTS = options.FileName\nGTF = options.GTF\n\n\n# check if files supplied exist:\ntry:\n handle = open(GTF, \"rU\")\n handle.close()\nexcept:\n print \"\\nError->\\tGTF File: %s does not exist\\n\" % GTF\n sys.exit()\n\ntry:\n handle = open(HTSEQ_COUNTS, \"rU\")\n handle.close()\nexcept:\n print \"\\nError->\\tFile: %s does not exist\\n\" % HTSEQ_COUNTS\n sys.exit()\n\n\n#\n# First job is to extract all the identifiers of genes/transcripts mapping to the rRNA and MT genes and store in 2 arrays\n#\n\nrRNA_identifiers = {}\nMT_identifiers = {}\n\n\nwith open(GTF, \"rU\") as handle:\n #line = handle.readline()\n for line in handle:\n \tline.rstrip('\\n')\n \tif 'gene_biotype \"rRNA\"' in line:\n identifier = line\n identifier = re.sub('.*gene_id \"', '', identifier)\n identifier = re.sub('\"; gene_version.*\\n', '', identifier)\n rRNA_identifiers[identifier] = 1\n if 'MT' in line:\n identifier = line\n identifier = re.sub('.*gene_id \"', '', identifier)\n identifier = re.sub('\"; gene_version.*\\n', '', identifier)\n MT_identifiers[identifier] = 1 \nhandle.close() \n\n#print(\"rRNA:\")\n#print(rRNA_identifiers.keys())\n#print(\"MT:\")\n#print(MT_identifiers.keys())\n\n\n#\n# Second job is to go through the HTSEQ-couts and count reads matching the rRNA identifiers\n#\nCummulative_rRNA_Count = 0\nrRNA_genes = 0\nReadCount = 0\nline_number = 0\nMT_genes = 0;\nCummulative_MT_Count = 0;\n\nwith open(HTSEQ_COUNTS, \"rU\") as handle:\n for line in handle:\n \tline.rstrip('\\n') \n\n split_line = line.split(\"\\t\")\n if line_number > 0:\n \t if split_line[0] in rRNA_identifiers.keys(): # if rRNA_identifiers[gene_id]\n rRNA_genes += 1\n Cummulative_rRNA_Count += int(split_line[1])\n if split_line[0] in MT_identifiers.keys():\n MT_genes += 1\n Cummulative_MT_Count += int(split_line[1])\n ReadCount += int(split_line[1])\n line_number += 1\nhandle.close() \n#print(Cummulative_MT_Count)\n#print(Cummulative_rRNA_Count)\n\n\n#\n# wiritng the output files:\n# \nout = HTSEQ_COUNTS + '_rRNAmtRNACounts.txt'; \nout = re.sub('.txt_', '_', out)\n\nprint \"Summary output file:\t\t\", out, \"\\n\"\n\nOUT = open(out, \"w\")\nOUT.write('HT-SEQ file name: \\t' + HTSEQ_COUNTS + '\\n\\n')\nOUT.write('GTF file name: \\t\\t' + GTF + '\\n\\n\\n')\nOUT.write('---------------------------------------------------------------------------------' + '\\n')\nOUT.write(' rRNA and MT identifiers\\n')\nOUT.write('---------------------------------------------------------------------------------' + '\\n')\nOUT.write('No. of rRNA identifiers: ' + str(len(rRNA_identifiers.keys())) + '\\n') # PRINT size of this hash\nOUT.write('No. of MT identifiers: ' + str(len(MT_identifiers.keys())) + '\\n') # PRINT size of this hash\nOUT.write('\\n\\n')\nOUT.write('---------------------------------------------------------------------------------' + '\\n')\nOUT.write(' HTSEQ mapping summary\\n')\nOUT.write('---------------------------------------------------------------------------------' + '\\n')\nOUT.write('ReadCount: ' + str(ReadCount) + '\\n\\n')\n#OUT.write(' Number of rRNA genes: ' + str(rRNA_genes) + '\\n')\nOUT.write('Total no. of rRNA transcripts: ' + str(Cummulative_rRNA_Count) + '\\n')\nperc_rRNA = 100*float(Cummulative_rRNA_Count)/float(ReadCount)\nperc_rRNA = str(round(perc_rRNA, 3))\nOUT.write('Percent rRNA mapped reads: ' + str(Cummulative_rRNA_Count) + ' / ' + str(ReadCount) + ' * 100 = ' + perc_rRNA + '%\\n\\n')\n#OUT.write('\\n Number of MT genes: ' + str(MT_genes) + '\\n')\nOUT.write('Total no. of MT transcripts: ' + str(Cummulative_MT_Count) + '\\n')\nperc_MT = 100*float(Cummulative_MT_Count)/float(ReadCount)\nperc_MT = str(round(perc_MT, 3))\nOUT.write('Percent MT mapped reads: ' + str(Cummulative_MT_Count) + ' / ' + str(ReadCount) + ' * 100 = ' + perc_MT + '%\\n\\n')\nOUT.close()\n\n\n" + }, + { + "comment": "Modify the following Python code to raise an exception if the input value is not an integer", + "code_snippet": "def square_root(x):\n if not isinstance(x, int):\n raise TypeError('Input must be an integer')\n return x**0.5" + }, + { + "comment": "Create a program in Python that extracts titles of all YouTube videos from a URL.", + "code_snippet": "import requests\nimport re\n\nurl = \"https://www.youtube.com/user/Thexvid/videos\"\n\nr = requests.get(url)\ntitles = re.findall('(.+?)<\\/a>',r.content.decode('utf-8'))\n\nfor title in titles:\n print(title[1])" + }, + { + "comment": "Write Python code to list the contents of a directory.", + "code_snippet": "# get the list of contents\nimport os\ndirectory_contents = os.listdir()\n\n# print the contents\nfor content in directory_contents:\n print(content)" + }, + { + "comment": "Create an AI-powered Python script to optimize any given code.", + "code_snippet": "# Import the required libraries\nfrom dataclasses import dataclass\n\n@dataclass\nclass Params:\n a: int\n b: int\n c: int\n\ndef get_result(params: Params) -> int:\n \"\"\"Returns the sum of the three given numbers\"\"\"\n return params.a + params.b + params.c\n\n# Optimize the code\ndef optimize(fn, params):\n best_score = 0\n best_params = None\n for a in range(params.a - 10, params.a + 10):\n for b in range(params.b - 10, params.b + 10):\n for c in range(params.c - 10, params.c + 10):\n score = fn(Params(a, b , c))\n if score > best_score:\n best_score = score\n best_params = {\n 'a': a,\n 'b': b,\n 'c': c\n }\n return best_params\n\nif __name__ == '__main__':\n # Initialize the parameters\n params = Params(a=1, b=2, c=3)\n # Get the optimized params\n best_params = optimize(get_result, params)\n # Print the optimized result\n print(\"Optimized result: %d\" % get_result(Params(best_params['a'], best_params['b'], best_params['c'])))" + }, + { + "comment": "Create a Python function that takes two strings and returns true if the first string is a subsequence of the second.", + "code_snippet": "def is_subsequence(str1, str2):\n i = 0\n j = 0\n\n while(i < len(str1) and j < len(str2)):\n if str1[i] == str2[j]:\n i += 1\n j += 1\n\n return i == len(str1)" + }, + { + "comment": "Create a neural network class in Python to model a linear regression function.", + "code_snippet": "import numpy as np\nclass LinearRegressionModel:\n def __init__(self, input_dim, output_dim):\n ''' Initialises the weights and bias of the linear regression model\n Arguments:\n input_dim {int} -- Number of input features\n output_dim {int} -- Number of outputs\n '''\n self.weights = np.zeros((input_dim, output_dim))\n self.bias = np.zeros((1, output_dim))\n\n def forward(self, x):\n ''' Calculates the forward pass of the linear regression model\n Arguments:\n x {numpy array} -- Input data of size (N, input_dim)\n Returns:\n out {numpy array} -- Output of the linear regression model of size (N, output_dim)\n '''\n out = np.dot(x, self.weights) + self.bias\n return out" + }, + { + "comment": "Develop a ML model in Python for predicting the winning team of football matches. Use data from the past 5 years of the Premier League.", + "code_snippet": "import pandas as pd\nimport numpy as np\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.ensemble import GradientBoostingClassifier\n\n# Load data\ndata = pd.read_csv('data.csv')\n\n# Select features\nfeatures = data[['home_team_goals', 'away_team_goals', 'home_team_possession', 'away_team_possession',\n 'home_team_shots', 'away_team_shots', 'home_team_yellow_cards', 'away_team_yellow_cards',\n 'home_team_red_cards', 'away_team_red_cards']]\n\n# Select target\ntarget = data['winner']\n\n# Split data into training and test sets\nX_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)\n\n# Train the model\ngbm = GradientBoostingClassifier()\ngbm.fit(X_train, y_train)\n\n# Make predictions\ny_pred = gbm.predict(X_test)\n\n# Evaluate the model\nscore = gbm.score(X_test, y_test)\nprint(f'Model score: {score}')" + }, + { + "comment": "Create a program in Python that evaluates a postfix expression. The input to the program should be a string with the postfix expression. The output should be the evaluated value of the expression.", + "code_snippet": "def evaluate(expression): \n \n # split expression into list \n expression = expression.split() \n \n # stack to store integer values. \n stack = [] \n \n # do for every element of expression. \n for element in expression: \n \n # If element is an operand push \n # into stack it's a number only \n if element.isdigit(): \n stack.append(element) \n \n # if element is an operator, \n # pop two elements from stack \n # perform respective operations \n else: \n val1 = stack.pop() \n val2 = stack.pop() \n stack.append(str(eval(val2 + element + val1))) \n \n # return the value \n return (int(stack[0])) \n \n# Driver Code \nexpression = \"2 3 + 4 5 * *\"\n \nprint(evaluate(expression)) \n\nOutput: 46" + }, + { + "comment": "Write a Python script to identify the words that contain more than one vowel in a given sentence.", + "code_snippet": "import re\n\nsentence = \"The quick brown fox jumped over the lazy dog.\"\npattern = r\"\\b(\\w*[aeiou]\\w*){2,}\\b\"\n\nmatch = re.findall(pattern, sentence)\nprint(match)\n\n# Output: ['quick', 'brown', 'jumped', 'over', 'lazy']" + }, + { + "comment": "This code provides a Python API for KB SRU. To use it, you can create an instance of the \"sru\" class and call its \"search\" method, passing in a query string and optionally a collection name, start record number, maximum number of records to return, and record schema. The method will return a \"response\" object containing the results of the search. You can then use the various properties of the \"response\" object to extract specific information from the search results, such as titles, creators, and subjects.", + "code_snippet": "#! /usr/bin/env python\n\"\"\"\nPython API for KB SRU\n\"\"\"\n\nimport sys\nimport urllib\nimport requests\nfrom lxml import etree\n\nSRU_BASEURL = 'http://jsru.kb.nl/sru/sru'\nSRU_BASEURL += '?version=1.2&maximumRecords=%i'\nSRU_BASEURL += '&operation=searchRetrieve'\nSRU_BASEURL += '&startRecord=%i'\nSRU_BASEURL += '&recordSchema=%s'\nSRU_BASEURL += '&x-collection=%s&query=%s'\n\nSETS = {'ANP': {'collection': 'ANP',\n 'description_en': 'Radio Bulletins ANP Press Agency',\n 'description_nl': 'ANP Radiobulletins Digitaal',\n 'metadataPrefix': 'didl',\n 'recordschema': 'dcx',\n 'setname': 'anp',\n 'time_period': [1937, 1989]},\n 'DPO': {'collection': 'DPO_boekdeel',\n 'description_en': 'Early Dutch Books Online',\n 'description_nl': 'Early Dutch Books Online',\n 'metadataPrefix': 'didl',\n 'recordschema': 'ddd',\n 'setname': 'DPO',\n 'time_period': [1781, 1800]},\n 'BYVANCK': {'description_en': 'Medieval Illuminated Manuscripts',\n 'description_nl': 'Middeleeuwse Verluchte Handschriften',\n 'metadataPrefix': 'dcx',\n 'setname': 'BYVANCK',\n 'time_period': [500, 1500]},\n 'SGD': {'description_en': 'States General Digital',\n 'description_nl': 'Staten-Generaal Digitaal',\n 'metadataPrefix': 'dcx',\n 'setname': 'sgd:register',\n 'time_period': [1962, 1994]},\n 'GGC': {'collection': 'GGC',\n 'description_en': 'General Catalogue KB',\n 'description_nl': 'Algemene Catalogus KB',\n 'metadataPrefix': 'dcx',\n 'recordschema': 'dcx',\n 'setname': 'ggc',\n 'time_period': [1937, 2021]}} # No idea what to use here?\n\n# Name spaces in GGC records\n\nsrw_ns = 'http://www.loc.gov/zing/srw/'\ntel_ns = 'http://krait.kb.nl/coop/tel/handbook/telterms.html'\nxsi_ns = 'http://www.w3.org/2001/XMLSchema-instance'\ndc_ns = 'http://purl.org/dc/elements/1.1/'\ndcterms_ns = 'http://purl.org/dc/terms/'\ndcx_ns = 'http://krait.kb.nl/coop/tel/handbook/telterms.html'\n\nNSMAPGGC = {\"srw\": srw_ns,\n \"tel\": tel_ns,\n \"xsi\": xsi_ns,\n \"dc\": dc_ns,\n \"dcterms\": dcterms_ns,\n \"dcx\": dcx_ns}\n\n\nclass response():\n def __init__(self, record_data, sru):\n self.record_data = record_data\n self.sru = sru\n\n def getElementText(self, tagName, attributeName, attributeValue):\n # Returns text content of all elements for which tag matches tagName,\n # and attribute value equals attributeValue. Set attributeName to empty\n # string to get all tagName matches.\n textFields = []\n for r in self.record_data.iter():\n if r.tag == tagName:\n if attributeName != '':\n try:\n if r.attrib[attributeName] == attributeValue:\n textFields.append(r.text)\n except KeyError:\n pass\n else:\n textFields.append(r.text)\n return textFields\n\n @property\n def records(self):\n if self.sru.nr_of_records == 0:\n record_data = \"\"\n else:\n ns = {'zs': 'http://www.loc.gov/zing/srw/'}\n record_data = self.record_data.xpath(\"zs:records/zs:record\",\n namespaces=ns)[0]\n return record(record_data, self.sru)\n\n # Below property functions all return a list with all instances that satisfy\n # criteria\n\n @property\n def typesDutch(self):\n return(self.getElementText('{http://purl.org/dc/elements/1.1/}type',\n '{http://www.w3.org/XML/1998/namespace}lang',\n 'nl'))\n\n @property\n def typesDCMI(self):\n return(self.getElementText('{http://purl.org/dc/elements/1.1/}type',\n '{http://www.w3.org/2001/XMLSchema-instance}type',\n 'DCMIType'))\n\n @property\n def identifiersISBN(self):\n return(self.getElementText('{http://purl.org/dc/elements/1.1/}identifier',\n '{http://www.w3.org/2001/XMLSchema-instance}type',\n 'dcterms:ISBN'))\n\n @property\n def identifiersBrinkman(self):\n return(self.getElementText('{http://purl.org/dc/elements/1.1/}identifier',\n '{http://www.w3.org/2001/XMLSchema-instance}type',\n 'dcx:Brinkman'))\n\n @property\n def identifiersURI(self):\n return(self.getElementText('{http://purl.org/dc/elements/1.1/}identifier',\n '{http://www.w3.org/2001/XMLSchema-instance}type',\n 'dcterms:URI'))\n\n @property\n def identifiersOCLC(self):\n return(self.getElementText('{http://purl.org/dc/elements/1.1/}identifier',\n '{http://www.w3.org/2001/XMLSchema-instance}type',\n 'OCLC'))\n\n @property\n def languagesDutch(self):\n return(self.getElementText('{http://purl.org/dc/elements/1.1/}language',\n '{http://www.w3.org/XML/1998/namespace}lang',\n 'nl'))\n\n @property\n def languagesEnglish(self):\n return(self.getElementText('{http://purl.org/dc/elements/1.1/}language',\n '{http://www.w3.org/XML/1998/namespace}lang',\n 'en'))\n\n @property\n def languagesFrench(self):\n return(self.getElementText('{http://purl.org/dc/elements/1.1/}language',\n '{http://www.w3.org/XML/1998/namespace}lang',\n 'fr'))\n\n @property\n def languagesISO639(self):\n return(self.getElementText('{http://purl.org/dc/elements/1.1/}language',\n '{http://www.w3.org/2001/XMLSchema-instance}type',\n 'dcterms:ISO639-2'))\n\n @property\n def dates(self):\n return(self.getElementText('{http://purl.org/dc/elements/1.1/}date',\n '',\n ''))\n\n @property\n def extents(self):\n return(self.getElementText('{http://purl.org/dc/terms/}extent',\n '',\n ''))\n\n @property\n def creators(self):\n return(self.getElementText('{http://purl.org/dc/elements/1.1/}creator',\n '',\n ''))\n\n @property\n def contributors(self):\n return(self.getElementText('{http://purl.org/dc/elements/1.1/}contributor',\n '',\n ''))\n\n @property\n def titles(self):\n return(self.getElementText('{http://purl.org/dc/elements/1.1/}title',\n '',\n ''))\n\n @property\n def titlesMain(self):\n return(self.getElementText('{http://purl.org/dc/elements/1.1/}title',\n '{http://www.w3.org/2001/XMLSchema-instance}type',\n 'dcx:maintitle'))\n\n @property\n def titlesIntermediate(self):\n return(self.getElementText('{http://purl.org/dc/elements/1.1/}title',\n '{http://www.w3.org/2001/XMLSchema-instance}type',\n 'dcx:intermediatetitle'))\n\n @property\n def publishers(self):\n return(self.getElementText('{http://purl.org/dc/elements/1.1/}publisher',\n '',\n ''))\n\n @property\n def countries(self):\n return(self.getElementText('{http://purl.org/dc/elements/1.1/}country',\n '',\n ''))\n\n @property\n def subjectsBrinkman(self):\n return(self.getElementText('{http://purl.org/dc/elements/1.1/}subject',\n '{http://www.w3.org/2001/XMLSchema-instance}type',\n 'dcx:Brinkman'))\n\n @property\n def subjectsISO9707(self):\n return(self.getElementText('{http://purl.org/dc/elements/1.1/}subject',\n '{http://www.w3.org/2001/XMLSchema-instance}type',\n 'ISO_9707_[Brinkman]'))\n\n @property\n def subjectsUNESCO(self):\n return(self.getElementText('{http://purl.org/dc/elements/1.1/}subject',\n '{http://www.w3.org/2001/XMLSchema-instance}type',\n 'UNESCO'))\n\n @property\n def collectionIdentifiers(self):\n return(self.getElementText('{http://purl.org/dc/terms/}isPartOf',\n '{http://www.w3.org/2001/XMLSchema-instance}type',\n 'dcx:collectionIdentifier'))\n\n @property\n def recordIdentifiersURI(self):\n return(self.getElementText('{http://krait.kb.nl/coop/tel/handbook/telterms.html}recordIdentifier',\n '{http://www.w3.org/2001/XMLSchema-instance}type',\n 'dcterms:URI'))\n\n @property\n def annotations(self):\n # Note that annotations sometimes contain language or itenID attibutes;\n # ignored for now (collect everything).\n return(self.getElementText('{http://krait.kb.nl/coop/tel/handbook/telterms.html}annotation',\n '',\n ''))\n\n\nclass record():\n def __init__(self, record_data, sru):\n self.record_data = record_data\n self.sru = sru\n\n def __iter__(self):\n return self\n\n # This works under Python 2.7\n def next(self):\n if self.sru.nr_of_records == 0:\n raise StopIteration\n if self.sru.startrecord < self.sru.nr_of_records + 1:\n record_data = self.sru.run_query()\n self.sru.startrecord += 1\n return response(record_data, self.sru)\n else:\n raise StopIteration\n\n # This works under Python 3\n def __next__(self):\n if self.sru.nr_of_records == 0:\n raise StopIteration\n if self.sru.startrecord < self.sru.nr_of_records + 1:\n record_data = self.sru.run_query()\n self.sru.startrecord += 1\n return response(record_data, self.sru)\n else:\n raise StopIteration\n\n\nclass sru():\n DEBUG = False\n\n collection = False\n maximumrecords = 50\n nr_of_records = 0\n query = \"\"\n recordschema = False\n sru_collections = SETS\n startrecord = 0\n\n def search(self, query, collection=False,\n startrecord=1, maximumrecords=1, recordschema=False):\n\n self.maximumrecords = maximumrecords\n if sys.version.startswith('3'):\n self.query = urllib.parse.quote_plus(query)\n elif sys.version.startswith('2'):\n self.query = urllib.quote_plus(query)\n self.startrecord = startrecord\n\n if collection not in self.sru_collections:\n raise Exception('Unknown collection')\n\n self.collection = self.sru_collections[collection]['collection']\n\n if not self.collection:\n raise Exception('Error, no collection specified')\n\n if not recordschema:\n self.recordschema = self.sru_collections[collection]['recordschema']\n else:\n self.recordschema = recordschema\n\n record_data = self.run_query()\n\n nr_of_records = [i.text for i in record_data.iter() if\n i.tag.endswith('numberOfRecords')][0]\n\n self.nr_of_records = int(nr_of_records)\n\n if self.nr_of_records > 0:\n return response(record_data, self)\n\n return False\n\n def run_query(self):\n url = SRU_BASEURL % (self.maximumrecords, self.startrecord,\n self.recordschema, self.collection, self.query)\n if self.DEBUG:\n sys.stdout.write(url)\n\n r = requests.get(url)\n\n if not r.status_code == 200:\n raise Exception('Error while getting data from %s' % url)\n\n record_data = etree.fromstring(r.content)\n\n return record_data\n" + }, + { + "comment": "Using numpy, create a Python code which would return an array with the running total of numbers in a given list.", + "code_snippet": "import numpy as np\n\ndef runningTotal(arr):\n return np.cumsum(arr)" + }, + { + "comment": "Construct a neural network in Python that can perform basic regression.", + "code_snippet": "import numpy as np\nimport pandas as pd\nfrom sklearn.model_selection import train_test_split\nfrom keras.models import Sequential\nfrom keras.layers import Dense\n \n# Load the data\ndata = pd.read_csv('data.csv')\n \n# Split the data into features and labels\nX = data.drop(['label'], axis=1).values\ny = data['label'].values\n \n# Split the data into training and testing sets\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n \n# Create the neural network\nmodel = Sequential()\nmodel.add(Dense(12, input_dim=X.shape[1], kernel_initializer='normal', activation='relu'))\nmodel.add(Dense(12, kernel_initializer='normal', activation='relu'))\nmodel.add(Dense(1, kernel_initializer='normal'))\n \n# Compile the model\nmodel.compile(loss='mean_squared_error', optimizer='adam')\n \n# Train the model\nmodel.fit(X_train, y_train, epochs=100, batch_size=20, verbose=0)\n \n# Evaluate the model\nscore = model.evaluate(X_test, y_test, verbose=0)\nprint('Test score:', score)" + }, + { + "comment": "Generate a program in Python to take in two strings as input, and output a list of all thewords in both strings.", + "code_snippet": "def string_to_list(s1, s2):\n s1_list = s1.split()\n s2_list = s2.split()\n output_list = s1_list + s2_list\n return output_list\n\nwords = string_to_list(\"I want to make a difference\", \"The journey to success is long\")\nprint(words)" + }, + { + "comment": "Create a web scraper using Python that retrieves the 10 most recent articles from a news website.", + "code_snippet": "import requests \nfrom bs4 import BeautifulSoup \n \n# URL to scrap \nURL = 'http://example.com/news'\n \n# get the source code \nhtml_content = requests.get(URL).text \n \n# parse the html content \nsoup = BeautifulSoup(html_content, \"lxml\") \n \n# get all the articles \narticles = soup.find_all('article') \n \n# print the first 10 articles \nfor article in articles[:10]: \n print(article.text)" + }, + { + "comment": "Create a Python program to convert HTML to plain text without using any libraries.", + "code_snippet": "def html_to_text(html_string):\n output = ''\n in_tag = False\n for char in html_string:\n if char == '<':\n in_tag = True\n elif char == '>':\n in_tag = False\n elif in_tag == False:\n output += char\n return output" + }, + { + "comment": "Write a python program to convert the given temperature from Celsius to Fahrenheit.", + "code_snippet": "\"\"\"\nConvert the given temperature from Celsius to Fahrenheit\n\"\"\"\n\ndef celsius_to_fahrenheit(celsius):\n fahrenheit = (celsius * 9/5) + 32\n return fahrenheit\n\nif __name__ == '__main__':\n celsius = 0\n print(celsius_to_fahrenheit(celsius))" + }, + { + "comment": "Create a lambda expression in Python to filter a list of integer greater than 50.", + "code_snippet": "list1 = [45, 12, 52, 89, 33, 99]\nfiltered_list = list(filter(lambda x: x > 50, list1))\n\nprint(filtered_list)" + }, + { + "comment": "Create a Python program to evaluate and compare the performance of two different heuristics algorithms.", + "code_snippet": "import time\nimport heuristics_algorithm1\nimport heuristics_algorithm2\n\n#input data for algorithms\ndata1 = ...\ndata2 = ...\n\n#Evaluate and compare the performance of two different heuristics algorithms\nstart = time.time()\nresult1 = heuristics_algorithm1.run(data1)\nend = time.time()\ntime1 = end - start\n\nstart = time.time()\nresult2 = heuristics_algorithm2.run(data2)\nend = time.time()\ntime2 = end - start\n\nif time1 < time2:\n print(\"Algorithm 1 is faster than algorithm 2\")\nelif time2 < time1:\n print(\"Algorithm 2 is faster than algorithm 1\")\nelse:\n print(\"Both algorithms have the same performance time\")" + }, + { + "comment": "Generate a function in Python that takes in a dictionary (string \u2192 integer) and returns the sum of all values in the dictionary.", + "code_snippet": "def compute_dict_sum(input_dict):\n result = 0\n for val in input_dict.values():\n result += val\n return result\n\nprint(compute_dict_sum(my_dict))" + }, + { + "comment": "Write a Python script that filters a list of integers and returns only even numbers.", + "code_snippet": "def filter_even_numbers(lst):\n # Filter out even numbers from the given list \n even_numbers = [num for num in lst if num % 2 == 0]\n return even_numbers\n\nlist = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]\nresult = filter_even_numbers(list)\nprint(result)" + }, + { + "comment": "Generate a Python program that takes a sentence as input and prints out only the verbs.", + "code_snippet": "import nltk\n\ndef extract_verbs(sentence):\n tokens = nltk.word_tokenize(sentence)\n tags = nltk.pos_tag(tokens)\n verbs = [word for word, pos in tags if pos.startswith('V')]\n return verbs\n\nsentence = \"The cat jumped over the moon.\"\nverbs = extract_verbs(sentence)\nprint(verbs)" + }, + { + "comment": "Create an AI-based system in Python for predicting stock market movements.", + "code_snippet": "#import libraries\nimport pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport seaborn as sns\n\n#import data\ndata = pd.read_csv('data.csv') \n\n#split data into train and test data\nX_train=data.iloc[:, 0] #Choosing the independent variable \ny_train=data.iloc[:, 1] #Choosing the dependent variable\nX_test=data.iloc[:, 0] #Choosing the independent variable \ny_test=data.iloc[:, 1] #Choosing the dependent variable\n\n#Build a deep learning model\n#import necessary libraries\nimport tensorflow as tf\nfrom tensorflow.keras.layers import Dense\nfrom tensorflow.keras.models import Sequential\n\n#create a model and add layers\nmodel = Sequential()\nmodel.add(Dense(64, input_shape=(1,), activation='relu'))\nmodel.add(Dense(128, activation='relu'))\nmodel.add(Dense(64, activation='relu'))\nmodel.add(Dense(1))\n\n#compile the model\nmodel.compile(optimizer='adam', loss='mse', metrics=['mse'])\n\n#train the model\nmodel.fit(X_train, y_train, batch_size=64, epochs=20, validation_data=(X_test, y_test))\n\n#Evaluate the model\nscore = model.evaluate(X_test, y_test, verbose=0)" + }, + { + "comment": "Develop an API in Python that fetches data from an SQL database and presents the data in the form of a JSON.", + "code_snippet": "import json \nimport sqlite3\nfrom flask import Flask, request, jsonify \n\napp = Flask(__name__) \n \ndef dict_factory(cursor, row):\n d = {}\n for idx, col in enumerate(cursor.description):\n d[col[0]] = row[idx]\n return d \n \n@app.route('/', methods=['GET'])\ndef index(): \n connection = sqlite3.connect('database.db')\n connection.row_factory = dict_factory\n cur = connection.cursor() \n all_users = cur.execute('SELECT * FROM users;').fetchall() \n return jsonify(all_users) \n \nif __name__ == '__main__': \n app.run(debug = True)" + }, + { + "comment": "Generate a Python dictionary that contains the count of the characters in a given string.", + "code_snippet": "def count_chars(string):\n count_dict = {}\n\n for char in string:\n if char in count_dict:\n count_dict[char] += 1\n else:\n count_dict[char] = 1\n\n return count_dict\n\nprint(count_chars(\"Hello World\"))\n\n# Output: {'H': 1, 'e': 1, 'l': 3, 'o': 2, ' ': 1, 'W': 1, 'r': 1, 'd': 1}" + }, + { + "comment": "Write a Python program to store and querying user data.", + "code_snippet": "import json\n\nuser_data = {\"Name\": \"John\", \"Age\": 25, \"City\": \"New York\"}\n\ndef store_user_data(data):\n with open('user_data.json', 'w') as f:\n json.dump(data, f)\n\ndef get_user_data():\n with open('user_data.json') as f:\n return json.load(f)\n\n# store data in JSON file\nstore_user_data(user_data)\n\n# get data\ndata = get_user_data()\nprint(data)" + }, + { + "comment": "You are required to write a Python program to optimize an equation.", + "code_snippet": "def optimize_equation(equation):\n variables = []\n for term in equation.split('+'):\n var, count = term.strip().split(' ')\n variables.append((var, int(count)))\n\n new_equation = \"\"\n for var, count in variables:\n new_equation += (var + '^' + str(count)) + ' + '\n return new_equation[:-3]\n\nequation = 'x + x + y + y + y'\n\nprint(optimize_equation(equation))" + }, + { + "comment": "Generate Python code that takes a list of strings and generates a list of lists of words from the strings. The returned list should have the first word for each string followed by following words for each string.", + "code_snippet": "def get_word_lists(strings):\n word_lists = []\n for string in strings:\n words = string.split(\" \")\n word_list = []\n for i in range(len(words)):\n word_list.append(words[i:])\n word_lists.append(word_list)\n return word_lists\n\nword_lists = get_word_lists([\"Hello world\", \"This is a test\"])\nprint(word_lists) # prints \"[[\"Hello\", \"world\"], [\"This\", \"is\", \"a\", \"test\"]]\"" + }, + { + "comment": "Create a web scraper that extracts specific data from a given website.", + "code_snippet": "The web scraper should utilize the Python package BeautifulSoup to parse webpages and extract the required data. BeautifulSoup will allow us to find HTML elements on the webpage using tags and attributes. Once the data is found, it can be converted into the desired format and stored.\n\nFor example, to extract the URL from a list of webpages, the following code snippet can be used:\n\nimport requests\nfrom bs4 import BeautifulSoup\n\n# Get the web page\nwebpage = requests.get(\"\")\n\n# Parse the web page\nsoup = BeautifulSoup(webpage.content, 'html.parser')\n\n# Find all 'a' tags\nlinks = soup.find_all('a')\n\n# Extract the URLs in the 'href' attribute\nfor link in links:\n print(link.get('href'))" + }, + { + "comment": "Construct a script in Bash that can install a given software.", + "code_snippet": "#!/bin/bash\n\n# Download latest version of Python\ncurl -O https://www.python.org/ftp/python/3.8.5/Python-3.8.5.tgz\n\n# Extract the tgz file\ntar -xzf Python-3.8.5.tgz\n\n# Move into the Python directory\ncd Python-3.8.5\n\n# Run the configure script\n./configure\n\n# Compile and install\nmake\nmake install" + }, + { + "comment": "Write a function in Python that takes a list of integers and returns a new list containing only odd numbers.", + "code_snippet": "def odd_numbers_list(numbers):\n return [num for num in numbers if num % 2 != 0]" + }, + { + "comment": "Create a Python script that adds two numbers together and prints the result.", + "code_snippet": "def add(a, b):\n sum = a + b\n print(sum)\n\nadd(5,7)" + }, + { + "comment": "Design an algorithm in Python to find the most common elements in an array of integers.", + "code_snippet": "def most_common(nums):\n # Create a dictionary to store the counts\n counts = {}\n\n # Iterate over the elements\n for num in nums:\n # If the element is not present in the dictionary, add it\n if num not in counts:\n counts[num] = 0\n\n # Update the count\n counts[num] += 1\n\n # Get the maximum count\n max_count = max(counts.values())\n\n # Return the elements that have maximum count\n return [num for num, count in counts.items() if count == max_count]" + }, + { + "comment": "Write a Python program to find the maximum element in the given array [1, 10, 50, 3, 8].", + "code_snippet": "array = [1, 10, 50, 3, 8]\n\n# Get the maximum element\nmax_element = max(array)\n\nprint(max_element) # Output: 50" + }, + { + "comment": "Create a Python script to analyze the sentiment of a text.", + "code_snippet": "import re\nfrom nltk.stem.snowball import SnowballStemmer\nfrom nltk.tokenize import word_tokenize\nfrom nltk.corpus import stopwords\nfrom nltk.sentiment.vader import SentimentIntensityAnalyzer \n\ndef sentiment_analyzer(text):\n # Tokenize the text\n tokens = word_tokenize(text)\n\n # Create stemmer object\n stemmer = SnowballStemmer('english')\n\n # Stem each token\n stemmed_words = [stemmer.stem(word) for word in tokens]\n\n # Remove punctuation\n clean_words = [word for word in stemmed_words if re.match('[a-zA-Z-]+$', word)]\n\n # Remove stopwords\n meaningful_words = [word for word in clean_words if not word in stopwords.words('english')]\n\n # Create a SentimentIntensityAnalyzer object\n sid = SentimentIntensityAnalyzer()\n\n # Get the polarity score of the text\n polarity_score = sid.polarity_scores(' '.join(meaningful_words))\n\n return polarity_score" + }, + { + "comment": "Create a Python script that takes a list of email addresses as input and prints a filtered list of email addresses that belong to a particular domain.", + "code_snippet": "filtered_list = [email for email in list if email.endswith(\"@gmail.com\")]\n\nprint(filtered_list)\n# Output: [\"john@gmail.com\", \"sophie@gmail.com\"]" + }, + { + "comment": "Generate a Python function to execute a valid insertion sort on a given list of integers.", + "code_snippet": "def insertion_sort(arr):\n for i in range(1, len(arr)):\n key = arr[i]\n j = i -1\n while j >= 0 and key < arr[j] :\n arr[j + 1] = arr[j]\n j = j - 1\n arr[j + 1] = key\n\nreturn arr" + }, + { + "comment": "Generate a program to sort a list of numbers using Insertion Sort algorithm in Python.", + "code_snippet": "def insertionSort(arr):\n\tfor i in range(1, len(arr)):\n\t\tcurrent = arr[i]\n\t\tj =i-1\n\t\twhile j>=0 and arr[j]>current:\n\t\t\tarr[j+1] = arr[j]\n\t\t\tj-=1\n\t\tarr[j+1] = current\n\treturn arr\n\nif __name__ == '__main__':\n\tarr = [2, 4, 7, 3, 8, 6, 1]\n\tprint(insertionSort(arr))" + }, + { + "comment": "Create an Python function that takes a string containing a numerical expression and returns the result.", + "code_snippet": "def eval_expr(expr):\n return eval(expr)" + }, + { + "comment": "Given a json-formatted string, create a python program to parse it into a dict.", + "code_snippet": "import json\n\ndata = '{\"name\": \"John\",\"age\": 30,\"address\": {\"city\": \"New York\",\"state\": \"NY\"}}'\n\ndata_dict = json.loads(data)" + }, + { + "comment": "Create a Python program to compress a given string by replacing all repeating characters with a single instance and its count.", + "code_snippet": "def compress(s): \n # Initialize results \n output = \"\" \n \n # Initialize count \n count = 1\n \n # Traverse the string \n for i in range(len(s)): \n \n # If the current character is same \n # as next one, increment its count \n if (i+1 < len(s) and s[i] == s[i+1]): \n count += 1\n \n else: \n # Append the count and character \n outputo + = str(count) + s[i] \n \n # Reset the count \n count = 1\n \n # Append the last obtained characters \n # and their count \n output += str(count) + s[i] \n \n return output\n\ns = \"aabbbbbbcccc\"\nprint(compress(s))\n# Output: 2a6b4c" + }, + { + "comment": "Create a basic CRUD API for the BlogPost model with Python/Flask. The API should include endpoints for creating a post, getting all posts, getting a single post, editing a post, and deleting a post.", + "code_snippet": "from flask import Flask, jsonify, request\nfrom flask_sqlalchemy import SQLAlchemy\n\napp = Flask(__name__)\napp.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///test.db'\ndb = SQLAlchemy(app)\n\nclass BlogPost(db.Model):\n id = db.Column(db.Integer, primary_key=True)\n title = db.Column(db.String(200))\n content = db.Column(db.Text)\n\n def __repr__(self):\n return '' % self.title\n\n@app.route('/post', methods=['POST'])\ndef create_post():\n data = request.get_json()\n post = BlogPost(title=data['title'], content=data['content'])\n db.session.add(post)\n db.session.commit()\n return jsonify(post.id)\n\n@app.route('/posts', methods=['GET'])\ndef get_posts():\n posts = BlogPost.query.all()\n return jsonify([post.serialize() for post in posts])\n\n@app.route('/post/', methods=['GET'])\ndef get_post(id):\n post = BlogPost.query.filter_by(id=id).first()\n return jsonify(post.serialize())\n\n@app.route('/post/', methods=['PUT'])\ndef edit_post(id):\n data = request.get_json()\n post = BlogPost.query.filter_by(id=id).first()\n post.title = data['title']\n post.content = data['content']\n db.session.commit()\n return jsonify(post.serialize())\n\n@app.route('/post/', methods=['DELETE'])\ndef delete_post(id):\n post = BlogPost.query.filter_by(id=id).first()\n db.session.delete(post)\n db.session.commit()\n return jsonify({ 'message' : 'Post deleted' })\n\nif __name__ == '__main__':\n app.run(debug=True)" + }, + { + "comment": "Write a Python program that calculates the distance between two points on a two-dimensional plane.", + "code_snippet": "#include \n#include \n\nint main() \n{ \n int x1 = 3; \n int y1 = 4; \n\n int x2 = -9; \n int y2 = -2;\n\n float distance = sqrt(pow(x2 - x1, 2) + pow(y2 - y1, 2)); \n\n std::cout << \"Distance between the two points is: \" << distance << std::endl; \n\n return 0; \n}" + }, + { + "comment": "Create an AI-based chatbot in Python that can answer simple queries about a product or service.", + "code_snippet": "import random\n\n# The answers to the queries\nanswers = {\n 'What is XYZ?': 'XYZ is a company that provides XYZ services.',\n 'What does XYZ do?': 'XYZ helps customers find solutions to their problems.',\n 'Where is XYZ located?': 'XYZ is located in ABC city.',\n 'What are the benefits of using XYZ?': 'XYZ provides a quick and easy way to find solutions to your problems.'\n}\n\n# Generate a response\ndef generate_response(question):\n if question in answers:\n return answers[question]\n else:\n return random.choice(['I am not sure I understand.', 'Can you give me more information?', 'Can I help you with something else?'])\n\n# Start the conversation\nquestion = input('Welcome to XYZ. How can I help you? ')\nresponse = generate_response(question)\n\nwhile response != 'Goodbye':\n print(response)\n question = input('Is there anything else I can help you with? ')\n response = generate_response(question)\n\nprint('Goodbye!')" + }, + { + "comment": "Generate a python program to read the contents from a given URL", + "code_snippet": "import requests\n\ndef read_url(url):\n r = requests.get(url)\n if r.status_code == 200:\n return r.text\n else:\n return None" + }, + { + "comment": "Code a function in Python that takes a list of integers (denoted by `lst`) and returns a list of booleans where each element of the output list is `True` if the corresponding element in the input list is a prime number, and `False` otherwise.", + "code_snippet": "def is_prime(n):\n # check if n is divisible by any of the numbers\n # between 2 and n-1\n for i in range(2, n):\n if n % i== 0:\n return False\n\n return True \n\ndef check_lst(lst):\n # generate a list of booleans \n # where each element corresponds to the corresponding\n # element of lst\n result_lst = []\n for i in range(len(lst)):\n if is_prime(lst[i]):\n result_lst.append(True)\n else:\n result_lst.append(False)\n\n return result_lst\n\nlst = [7, 8, 12, 19, 22, 23]\nprint(check_lst(lst))\n\n# Output: \n# [True, False, False, True, False, True]" + }, + { + "comment": "Generate a program in Python to output the top 3 numbers from an array of integers", + "code_snippet": "def top_three(numbers):\n sorted_numbers = sorted(numbers, reverse=True)\n return sorted_numbers[:3]\n\nprint(top_three([7, 13, 5, 8, 50, 11, 64, 48]))" + }, + { + "comment": "Using Python, build a dictionary containing the numbers from 1 to n as the key and the square of each number as the value.", + "code_snippet": "my_dict = {}\n\nfor i in range(1,n+1):\n my_dict[i] = i**2\n\nprint(my_dict)" + }, + { + "comment": "Edit the following Python program to make a correct API request and parse the response.", + "code_snippet": "import requests\n\nurl = 'https://api.example.com/users'\n\nheaders = {\n 'Authorization': 'Bearer YOUR_API_TOKEN',\n 'Content-Type': 'application/json',\n}\n\nresponse = requests.get(url, headers=headers).json()\n\nfor user in response['data']:\n print(user)" + }, + { + "comment": "Create a Python program that takes in text input from the user and searches a given list of strings to find any matches.", + "code_snippet": "import re \n \n# input string \nstrings = [\"First\",\"Second\",\"Third\"]\n\n# Get the string \ninp_str = input(\"Please enter your string :\") \n\n# Initialize the flag \nfound = False\n \n# Search the string \nfor i in strings: \n if re.search(i, inp_str):\n found = True\n\n# Print result \nif found == True: \n print(\"String matches\") \nelse: \n print(\"String doesn't matches\")" + }, + { + "comment": "Generate a program in Python to convert all characters of a string in lowercase.", + "code_snippet": "def to_lower(string):\n return string.lower()" + }, + { + "comment": "Edit the following Python program to use a dictionary instead of a list.", + "code_snippet": "items = {1: 'one', 2: 'two', 3: 'three', 4: 'four'}\n\nfor key, value in items.items():\n print(value)" + }, + { + "comment": "Construct a prime number sieve in Python which returns a list of all primes up to a given number.", + "code_snippet": "def primeSieve(n): \n prime_list = [True for i in range(n+1)] \n p = 2\n while (p * p <= n): \n if (prime_list[p] == True): \n for i in range(p * 2, n+1, p): \n prime_list[i] = False\n p += 1\n primes=[] \n for p in range(2, n): \n if prime_list[p]: \n primes.append(p) \n return primes\n\nprint(primeSieve(10)) # [2, 3, 5, 7]" + }, + { + "comment": "Write a python code to print all prime numbers between 1 and a given number n.", + "code_snippet": "# Function to generate prime numbers between 1 and a given number n \ndef generate_prime_numbers(n): \n \n # Array for checking if a number is prime or not\n prime_list = [True for i in range(n + 1)] \n p = 2\n while (p * p <= n): \n \n # If prime_list[p] is not changed, then it is a prime \n if (prime_list[p] == True): \n \n # Update all multiples of p \n for i in range(p * 2, n + 1, p): \n prime_list[i] = False\n p += 1\n \n # Collecting prime numbers \n for p in range(2, n): \n if prime_list[p]: \n print(p)" + }, + { + "comment": "Find the optimal solution to solve a knapsack problem using dynamic programming.", + "code_snippet": "// A Naive recursive Python \n// program for 0-1 Knapsack problem \n \n# Returns the maximum value \n# that can be put in a knapsack of \n# capacity W \ndef knapSack(W, wt, val, n): \n \n # Base Case \n if n == 0 or W == 0 : \n return 0\n \n # If weight of the nth item is more \n # than Knapsack capacity W, then \n # this item cannot be included in the optimal solution \n if (wt[n-1] > W): \n return knapSack(W, wt, val, n-1) \n \n # return the maximum of two cases: \n # (1) nth item included \n # (2) not included \n else: \n return max(val[n-1] + knapSack(W-wt[n-1], wt, val, n-1), \n knapSack(W, wt, val, n-1)) \n \n# end of function knapSack" + }, + { + "comment": "Using Python, create a function that calculates the objective function of a linear equation.", + "code_snippet": "def linear_eq(a, b, c):\n return a*x + b*y - c\n\n# For example:\nresult = linear_eq(3, 5, 7)\nprint(result)" + }, + { + "comment": "Implement an insertion sort algorithm in Python.", + "code_snippet": "def insertion_sort(list):\n for i in range(1,len(list)):\n key = list[i]\n j = i - 1\n while j>=0 and key < list[j]:\n list[j+1] = list[j]\n j-=1\n list[j+1] = key\n return list" + }, + { + "comment": "Write a python program to generate a random password given a set of letters, numbers, and characters.", + "code_snippet": "import random \n \n \n# All the possible characters \n# that could go into the password \nstring = \"abcdefghijklmnopqrstuvwxyz01234567890ABCDEFGHIJKLMNOPQRSTUVWXYZ!@#$%^&*()?\"\n \n# Using the choice function of the \n# random module choose any \n# character randomly from the set of characters \npassword = \"\".join(random.sample(string, 8)) \nprint(password)" + }, + { + "comment": "Develop a Python module to generate random numbers.", + "code_snippet": "import random\n\ndef generate_random_numbers(length, min_value, max_value):\n return [random.uniform(min_value, max_value) for _ in range(length)]" + }, + { + "comment": "How do we use recursion to solve a given problem in Python?", + "code_snippet": "Recursion is a powerful technique for solving problems. It involves defining a problem in terms of itself. A recursion problem is solved by breaking it down into smaller problems, until the problem is small enough to be solved directly. The result from a smaller problem can then be combined with the result from the larger problem, until the original problem is solved. In Python, a recursive function is one that calls itself." + }, + { + "comment": "Create an application using Python and Flask to submit form data and store it in a database.", + "code_snippet": "from flask import Flask, render_template, request, redirect\nimport sqlite3\n\napp = Flask(__name__)\n\n@app.route('/')\ndef home():\n return render_template('index.html')\n\n@app.route('/submit', methods=['POST'])\ndef submit():\n name = request.form.get('name')\n email = request.form.get('email')\n phone = request.form.get('phone')\n \n conn = sqlite3.connect('data.db')\n cur = conn.cursor()\n \n cur.execute(\"INSERT INTO data (name, email, phone) VALUES (?, ?, ?)\", (name, email, phone))\n conn.commit()\n \n return redirect('/')\n\t\nif __name__ == '__main__':\n app.run(debug=True)" + }, + { + "comment": "Optimize a Python program that processes a 2d array and stores the transpose output in a new array.", + "code_snippet": "def transpose(A):\n n = len(A)\n m = len(A[0])\n\n B = [[0 for x in range(n)] for x in range(m)]\n\n for i in range(n):\n for j in range(m):\n B[j][i] = A[i][j]\n\n return B" + }, + { + "comment": "Generate a Python script that takes any number of integer arguments and returns their sum.", + "code_snippet": "def sum_ints(*args):\n total = 0\n for num in args:\n total += num\n \n return total\n\nprint(sum_ints(3, 5, 7))" + }, + { + "comment": "Create a function in Python which takes a list as input and returns the sum of the items in the list.", + "code_snippet": "def sum_list(input_list):\n sum = 0\n for i in input_list:\n sum += i\n return sum" + }, + { + "comment": "Create a Selenium script in Python to automate the login process in a given website.", + "code_snippet": "from selenium import webdriver\nfrom selenium.webdriver.common.keys import Keys\nimport time\n\n# Initialize web driver\ndriver = webdriver.Chrome()\n\n# Navigate to the target web page\ndriver.get('http://www.example.com')\n\n# Locate username field\nusername = driver.find_element_by_name('username')\n\n# Enter username\nusername.send_keys('username')\n\n# Locate password field\npassword = driver.find_element_by_name('password')\n\n# Enter password\npassword.send_keys('password')\n\n# Locate submit button\nsubmit_button = driver.find_element_by_name('submit')\n\n# Click submit button\nsubmit_button.click()\n\n# Wait for login process to complete\ntime.sleep(5)\n\n# Login successful\nprint('Login successful!')\n\ndriver.close()" + }, + { + "comment": "Create an AI model in Python to classify emails from the Enron Spam Dataset as either spam or non-spam.", + "code_snippet": "import pandas as pd\nfrom sklearn.feature_extraction.text import CountVectorizer\nfrom sklearn.model_selection import train_test_split\nfrom sklearn import svm\nfrom sklearn.metrics import precision_score\n\n# Load the dataset\ndf = pd.read_csv(\"spam.csv\", encoding='iso-8859-1')\n# Load label and data\nX = df['EmailText']\ny = df['Label']\n\n# Split data into train-test sets\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)\n\n# Create bag of words model\ncount_vectorizer = CountVectorizer()\nX_train_counts = count_vectorizer.fit_transform(X_train)\n\n# Train the model\nclf = svm.SVC(kernel='linear')\nclf.fit(X_train_counts, y_train)\n\n# Test the model\nX_test_counts = count_vectorizer.transform(X_test)\npredictions = clf.predict(X_test_counts)\nscore = precision_score(y_test, predictions, average='binary')\n\nprint('Precision score of the model:', score)" + }, + { + "comment": "Re-write the following SQL query in Python.", + "code_snippet": "import sqlite3\n\nconn = sqlite3.connect('movies.db') \ncursor = conn.cursor()\n\ncursor.execute(\"SELECT * FROM Movies WHERE category = 'horror'\")\n\nresult = cursor.fetchall()\nfor row in result:\n print(row)\n\nconn.close()" + }, + { + "comment": "Take two matrices, A and B, and produce a matrix C where each element is the product of the corresponding elements of A and B.", + "code_snippet": "# Python\ndef matrixMultiplication(A, B):\n if len(A[0]) != len(B):\n return \"Matrix multiplication not possible\"\n\n C = [[0 for i in range(len(A[0]))] for j in range(len(B[0]))]\n\n for i in range(len(A)):\n for j in range(len(B[0])):\n for k in range(len(B)):\n C[i][j] += A[i][k] * B[k][j]\n\n return C\n\nA = [[3, 4], \n [1, 2]]\nB = [[7, 5],\n [0, 1]]\nC = matrixMultiplication(A, B)\n\nprint(C) # Output = [[21, 17], [0, 2]]" + }, + { + "comment": "Come up with a correct approach to optimize the following python program.", + "code_snippet": "numbers = [1, 2, 3, 4, 5, 6]\n\nprint(*numbers, sep='\\n')" + }, + { + "comment": "Design a program in Python to detect the vowel in a given string.", + "code_snippet": "def vowelCheck(inputString): \n vowels = ['a','e','i','o','u','A','E','I','O','U'] \n for x in inputString: \n if x in vowels: \n return x" + }, + { + "comment": "Convert a given SQL statement into a Python Pandas DataFrame.", + "code_snippet": "import pandas as pd\n\ndf = pd.read_sql_query(\"SELECT * FROM book;\", conn)" + }, + { + "comment": "Using the data provided, generate a histogram in Python.", + "code_snippet": "import matplotlib.pyplot as plt\n\ndata = [3, 5, 7, 2, 8, 10, 11, 65, 72, 81, 99, 100, 150]\n\n# plotting the histogram \nplt.hist(data, bins = [0, 20, 40, 60, 80, 100, 120, 140, 160], edgecolor = 'black') \n \n# x-axis label \nplt.xlabel('Range of values') \n# y-axis label \nplt.ylabel('Number of Occurrences') \n# plot title \nplt.title('Histogram of data') \n \n# function to show the plot \nplt.show()" + }, + { + "comment": "Create a Python script to populate an array with random numbers from 1-100.", + "code_snippet": "import random\n\ndef populateArray(array_length):\n array = []\n for i in range(array_length):\n array.append(random.randint(1, 100))\n return array" + }, + { + "comment": "Create an natural language processing application in Python that extracts the top 10 most frequent words from a given text.", + "code_snippet": "from collections import Counter\n\ntext = \"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.\"\n\n# Split the text into individual words\nwords = text.split()\n\n# Create a counter object\ncounter = Counter(words)\n\n# Get the top 10 most frequent words\ntop_words = counter.most_common(10)\n\nprint(top_words)\n\n# Output: [('dolor', 1), ('Lorem', 1), ('ipsum', 1), ('amet', 1), ('consectetur', 1), ('adipiscing', 1), ('elit,', 1), ('sed', 1), ('do', 1), ('eiusmod', 1)]" + }, + { + "comment": "Given an array of strings, write a Python function to remove all duplicates and return the resulting array.", + "code_snippet": "def removeDuplicates(arr):\n result = [] \n \n # traverse for all elements \n for i in arr: \n \n # check if exists in result \n if i not in result: \n result.append(i) \n \n # return result \n return result" + }, + { + "comment": "Generate a Python script to find the longest common substring in two strings.", + "code_snippet": "def longest_common_substring(str1, str2):\n match = 0\n substr = \"\"\n for i in range(len(str1)):\n for j in range(len(str2)):\n l = 0\n while (i + l < len(str1) and j + l < len(str2) and str1[i + l] == str2[j + l]):\n l += 1\n if l > match:\n substr = str1[i:i + l]\n match = l\n return substr\n\nresult = longest_common_substring(\"Helwallomst\", \"Welcome\")\nprint(result)" + }, + { + "comment": "Write a function in Python to search for a character in a given string.", + "code_snippet": "def serach_char(input_str, char):\n for char in input_str:\n if char == input_char:\n return True\n return False" + }, + { + "comment": "Write a python program to parse an XML document and return a list of tag objects.", + "code_snippet": "def parse_xml(text):\n tag_objects = []\n\n root = ET.fromstring(text)\n for child in root:\n tag = Tag(child.attrib[\"id\"])\n for subchild in child:\n tag.add_value(subchild.attrib[\"value\"])\n tag_objects.append(tag)\n\n return tag_objects\n\nclass Tag:\n def __init__(self, tag_id):\n self.tag_id = tag_id\n self.values = []\n \n def add_value(self, value):\n self.values.append(value)" + }, + { + "comment": "Based on a given input, create a function in Python to generate a random string of length 6.", + "code_snippet": "import random\nimport string\n\ndef random_string(length):\n letters = string.ascii_lowercase\n return ''.join(random.choice(letters) for i in range(length))\n\nrandom_string(6)" + }, + { + "comment": "Create a Python program to average the elements of a 2-dimensional array.", + "code_snippet": "def averageMatrix(matrix):\n sum = 0\n for row in matrix:\n for e in row:\n sum += e\n\n return sum / (len(matrix) * len(matrix[0]))\n\na = [[3,2,6],\n [8,5,9],\n [4,7,1]]\n\nprint(averageMatrix(a)) # 4.77" + }, + { + "comment": "Generate a code in Python to convert a number into binary format.", + "code_snippet": "\"\"\"\nGenerate a code in Python to convert a number into binary format\n\"\"\"\ndef convert_to_binary(num):\n # check if the number is 0, if so return 0\n if num == 0:\n return 0\n \n # initialize an empty binary string\n binary = ''\n \n # loop through the number and get the remainder \n # which is the bits of the binary\n while num != 0:\n remainder = num % 2\n binary = str(remainder) + binary\n num = num // 2\n \n return binary\n\n# test the function \nprint(convert_to_binary(10)) # 1010" + }, + { + "comment": "Generate a python program that takes an integer and then prints out the factorial of it?", + "code_snippet": "def factorial(num):\n if num <= 1:\n return 1\n else:\n return num * factorial(num - 1) \n\nn = int(input(\"Enter a number: \")) \nprint(\"Factorial of\",n,\"is\",factorial(n))" + }, + { + "comment": "Write a Python program to convert a positive integer to negative or vice versa using bitwise NOT operator.", + "code_snippet": "def convert_to_negative(x): \n return ~x + 1 \n \ndef convert_to_positive(x): \n return ~(x-1)" + }, + { + "comment": "Build a simple blockchain system in Python.", + "code_snippet": "import hashlib\nimport json\n\n# Define the Block class\nclass Block:\n def __init__(self, index, timestamp, data, previous_hash):\n self.index = index\n self.timestamp = timestamp\n self.data = data\n self.previous_hash = previous_hash\n self.hash = self.hash_block()\n\n # Create the hash of the block contents\n def hash_block(self):\n sha = hashlib.sha256()\n sha.update((str(self.index) + \n str(self.timestamp) + \n str(self.data) + \n str(self.previous_hash)).encode('utf-8'))\n return sha.hexdigest()\n\n# Create the blockchain\ndef create_blockchain():\n blockchain = [create_genesis_block()]\n previous_block = blockchain[0]\n\n # Add blocks for each item in the list of transactions\n for i in range(1, len(blockchain_transactions)):\n block_to_add = next_block(previous_block, blockchain_transactions[i])\n blockchain.append(block_to_add)\n previous_block = block_to_add\n\n# Print the blockchain\ndef print_blockchain():\n for block in blockchain:\n block_index = str(block.index)\n block_timestamp = str(block.timestamp)\n block_data = str(block.data)\n block_hash = block.hash" + }, + { + "comment": "Write a Python script to find the GCD of two numbers.", + "code_snippet": "def findGCD(a, b): \n if (b == 0): \n return a\n return findGCD(b, a % b)\n \na = 1000\nb = 500\nprint(\"GCD of number is: \", findGCD(a, b))" + }, + { + "comment": "Create a program in Python to find the number of occurrences of an element x in a list y.", + "code_snippet": "def count_x(x, y):\n count = 0\n for i in y:\n if i == x:\n count += 1\n return count" + }, + { + "comment": "Write a Python program to convert ratings in a list of strings to a float.", + "code_snippet": "def str_to_float(ratings):\n return [float(x) for x in ratings]" + }, + { + "comment": "Create a Python program that encrypts a message using a Caesar cipher.", + "code_snippet": "def caesar_encrypt(message, shift):\n encrypted_message = \"\"\n for c in message:\n encrypted_c = ord(c) + shift\n if encrypted_c > 126:\n encrypted_c -= 94\n encrypted_message += chr(encrypted_c)\n return encrypted_message" + }, + { + "comment": "Write a Python program to find the largest three numbers in a list.", + "code_snippet": "my_list = [1, 6, 3, 8, 7, 4]\nlargest_three = sorted(my_list)[-3:]\nprint(largest_three)" + }, + { + "comment": "Given two binary trees, check if the first tree is subtree of the second tree.", + "code_snippet": "# Python program to check if \n# a binary tree is subtree of another binary tree \n \n# A binary tree node \nclass Node: \n \n # Constructor to create a new node \n def __init__(self, data): \n self.data = data \n self.left = None\n self.right = None\n \ndef isSubTree(T, S): \n \n if S is None: \n return True\n \n if T is None: \n return False\n \n if areIdentical(T, S): \n return True\n \n return isSubTree(T.left, S) or isSubTree(T.right, S) \n \n \ndef areIdentical(T, S): \n \n if T is None and S is None: \n return True\n \n if T is None or S is None: \n return False\n \n return (T.data == S.data and areIdentical(T.left, S.left)and\n areIdentical(T.right, S.right))" + }, + { + "comment": "Write a Python program to swap the values of two variables.", + "code_snippet": "x = 5\ny = 10\n\n# Swapping process \nx, y = y, x \n\nprint(\"x:\", x) \nprint(\"y:\", y)" + }, + { + "comment": "Optimize the following Python code to use built-in functions.", + "code_snippet": "def removeDuplicates(array): \n finalArray = []\n for element in array: \n if element not in finalArray: \n finalArray.append(element)\n return finalArray" + }, + { + "comment": "Create a program in Python that takes a list of words and an integer n and returns a list of words that appear at least n times in the given list.", + "code_snippet": "from collections import Counter\n\ndef frequent_words(words, n):\n counts = Counter(words)\n return [word for word, count in counts.items() if count >= n]\n\nresult = frequent_words(words, n)\n\nprint(result) # prints ['a', 'd']" + }, + { + "comment": "Devise a Python program to determine whether a given number is even or odd.", + "code_snippet": "def even_or_odd(num):\n if num % 2 == 0:\n return \"Even\"\n else:\n return \"Odd\"" + }, + { + "comment": "Compose a basic blockchain system in Python.", + "code_snippet": "import hashlib\nimport json\n\n# We will use SHA256 as our hashing function\ndef sha256(data):\n hash_function = hashlib.sha256()\n hash_function.update(data.encode('utf-8'))\n return hash_function.hexdigest()\n\n# Create a class for each block\nclass Block:\n def __init__(self, index, timestamp, data, previous_hash):\n self.index = index\n self.timestamp = timestamp\n self.data = data\n self.previous_hash = previous_hash\n self.hash = sha256(f'{self.index}{self.timestamp}{self.data}{self.previous_hash}')\n\n# Create a class for the blockchain\nclass Blockchain:\n def __init__(self):\n self.chain = []\n\n def add_block(self, data):\n previous_hash = self.chain[-1].hash if len(self.chain) > 0 else None\n block = Block(len(self.chain), data['timestamp'], data['data'], previous_hash)\n self.chain.append(block)\n\n # This method will return the entire blockchain in JSON format\n def get_chain(self):\n return json.dumps([b.__dict__ for b in self.chain], indent=2)" + }, + { + "comment": "Provide a Python code that uses the exponential smoothing algorithm to predict the next day's forecast temperature given the past three day's forecast temperatures.", + "code_snippet": "\"\"\"\nThis code uses the exponential smoothing algorithm to predict the next day's forecasted temperature given the past three day's forecasted temperatures.\n\"\"\"\n\ndef predict_temperature(temperature_list):\n alpha = 0.5 #Confidence value\n predicted_temperature = temperature_list[-1] # Initializing prediction\n for temperature in temperature_list[:-1]:\n predicted_temperature = alpha*temperature + (1-alpha)*predicted_temperature\n return predicted_temperature\n\nif __name__ == '__main__':\n temperature_list = [21, 22, 20]\n print(predict_temperature(temperature_list))" + }, + { + "comment": "Write a function in Python to sort a dictionary according to its values, in descending order.", + "code_snippet": "def sortDictionary(dictionary):\n sorted_list = sorted(dictionary.items(), key=lambda x: x[1], reverse=True)\n sorted_dictionary = dict(sorted_list)\n return sorted_dictionary\n\nsorted_dictionary = sortDictionary({'apple': 2, 'banana': 3, 'mango': 1})\nprint(sorted_dictionary)" + }, + { + "comment": "Create a function in Python to generate a multiplication table with numbers in a range given start and end parameters.", + "code_snippet": "def multiplication_table(start, end):\n\tfor x in range(start, end + 1):\n\t\tfor y in range(start, end + 1):\n\t\t\tprint(x * y, end = \" \")\n\t\tprint(\"\\n\")\n\t\t\nmultiplication_table(start, end)" + }, + { + "comment": "Write a python program to identify if a string s has matching parentheses", + "code_snippet": "def matching_paren(s):\n stack = []\n for char in s:\n if char == '(':\n stack.append(char)\n elif char == ')':\n if len(stack) == 0:\n return False \n stack.pop()\n \n if len(stack) == 0:\n return True\n else:\n return False" + }, + { + "comment": "Develop a function in Python to take as input two array of integers and swap their elements.", + "code_snippet": "def swap(arr1, arr2):\n assert len(arr1) == len(arr2)\n for i in range(len(arr1)):\n temp = arr1[i]\n arr1[i] = arr2[i]\n arr2[i] = temp" + }, + { + "comment": "Can you design a function in Python for reversing a list?", + "code_snippet": "def reverse_list(list): \n return list[::-1]" + }, + { + "comment": "Create a Python script to build a dictionary from string to string of the given samples.", + "code_snippet": "def build_dictionary(samples):\n dictionary = {}\n for sample in samples:\n key, value = sample.split(\"->\")\n dictionary[key] = value\n return dictionary\n \nsamples = [\"apple->red\", \"banana->yellow\", \"cherry->red\"]\ndictionary = build_dictionary(samples)\n\nprint(dictionary)" + }, + { + "comment": "Write a Python script to find the average of numbers stored in an array.", + "code_snippet": "def calculate_mean(numbers):\n total_sum = 0\n for num in numbers:\n total_sum += num\n mean = total_sum / len(numbers)\n return mean\n\nprint(calculate_mean([10, 20, 30]))" + }, + { + "comment": "Generate a Python program that takes two lists as input and returns a new list containing only the common elements.", + "code_snippet": "def common_elements(list1, list2): \n common_list=[]\n for element in list1: \n if element in list2: \n common_list.append(element) \n return common_list\n \nlist1 = [1, 2, 3, 4, 8] \nlist2 = [2, 6, 8, 10]\n\ncommon_list = common_elements(list1, list2) \n\nprint(common_list)" + }, + { + "comment": "Create a loop in Python to print out all the odd numbers from 1 to 10.", + "code_snippet": "for i in range(1, 11):\n if i % 2 != 0:\n print(i)" + }, + { + "comment": "Create a machine learning model in Python that predecits customer lifetime value given a customer's past transactions.", + "code_snippet": "import pandas as pd\nimport numpy as np\n\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.ensemble import RandomForestRegressor\n\n# Preprocessing\ndf = pd.read_csv('transactions.csv')\nX = df.iloc[:,:-1].values\ny = df.iloc[:,-1].values\nscaler = StandardScaler()\nX = scaler.fit_transform(X)\n\n# Model Training\nmodel = RandomForestRegressor()\nmodel.fit(X, y)\n\n# Prediction\ncustomer_input = np.array([[10,100,5,...]]) # provide data related to past transactions\ntrans_scaled = scaler.transform(customer_input)\npredicted_LTV = model.predict(trans_scaled)[0]\nprint('Predicted customer lifetime value', predicted_LTV)" + }, + { + "comment": "Given a list of numbers and a target number, write a Python program to find a pair of numbers from the given list that add up to the given target.", + "code_snippet": "def find_pair(lst, target):\n for i in range(len(lst)):\n if (target - lst[i]) in lst[i+1:]:\n return [lst[i], target-lst[i]]\n\nfind_pair([1,2,3,4,4,5,6], 8)" + }, + { + "comment": "Develop a class in Python to perform various matrix operations like addition, subtraction, multiplication and transpose.", + "code_snippet": "class Matrix: \n def __init__(self, m, n, lst): \n self.m = m \n self.n = n \n self.mat = [] \n \n c = 0\n for i in range(self.m): \n a =[] \n for j in range(self.n): \n a.append(lst[c]) \n c += 1\n self.mat.append(a) \n \n def __str__(self): \n output = \"\"\n for i in self.mat: \n for j in i: \n output += str(j) + \" \"\n output += '\\n'\n return output \n \n def add(self, mat2): \n for i in range(self.m): \n for j in range(self.n): \n self.mat[i][j] += mat2.mat[i][j] \n \n def sub(self, mat2): \n for i in range(self.m): \n for j in range(self.n): \n self.mat[i][j] -= mat2.mat[i][j]\n \n def mul(self, mat2): \n result = [] \n for i in range(self.m): \n a =[] \n for j in range(mat2.n): \n s = 0\n for k in range(self.n): \n s += self.mat[i][k] * mat2.mat[k][j] \n a.append(s) \n result.append(a) \n self.mat = result \n self.m = len(result) \n self.n = len(result[0]) \n \n def transpose(self):\n result = [[self.mat[j][i] for j in range(len(self.mat))] for i in range(len(self.mat[0]))]\n self.mat = result \n self.m = len(result) \n self.n = len(result[0])" + }, + { + "comment": "Write a Python script to generate a random number between 0 and 9.", + "code_snippet": "import random\n\nrandom_num = random.randint(0,9)\nprint(random_num)" + }, + { + "comment": "Create a Python Flask app that listens for POST requests on the '/webhook' route. When a request is received, extract the necessary parameters from the request and call the 'makeWebhookResult' function to generate a response. The response should be a JSON object containing a 'speech' field with the text to be spoken by the voice assistant. The app should be started on port 5000 and run in debug mode.", + "code_snippet": "#!/usr/bin/env python\n# coding=utf-8\nimport requests\nimport urllib2\nimport json\nimport os\nfrom flask import Flask\nfrom flask import request\nfrom flask import make_response\nfrom bs4 import BeautifulSoup \n\n\n# Flask app should start in global layout\napp = Flask(__name__)\n\n\n@app.route('/webhook', methods=['POST'])\ndef webhook():\n\treq = request.get_json(silent=True, force=True)\n\tres = makeWebhookResult(req)\n \tres = json.dumps(res, indent=4)\n \tprint(res)\n \tr = make_response(res)\n \tr.headers['Content-Type'] = 'application/json'\n \treturn r\n\ndef makeWebhookResult(req):\n\tresult = req.get(\"result\")\n\tparameters = result.get(\"parameters\")\n\t\n\tif req.get(\"result\").get(\"action\") == \"productos.sura\":\n \tcliente = parameters.get(\"tipo_cliente\")\n \t\tspeech = \"Buscando productos para \" + cliente\n\t\t\n\telif req.get(\"result\").get(\"action\") == \"producto.info\":\n \tproducto = parameters.get(\"producto\")\n\t\tif (producto==\"hogar\"):\n\t\t\turl = \"https://www.sura.com/soluciones-personas/seguro-hogar.aspx\"\n\t\t\tr = urllib2.urlopen(url).read()\n\t\t\tsoup = BeautifulSoup(r, 'html.parser')\n\t\t\tprint soup\n\t\t\tcontenido = soup.find_all(\"div\",class_=\"textRightColumn\")\n\t\t\tif (len(contenido)==0):\n\t\t\t\tspeech = \"No encontrado\"\n\t\t\telse:\n\t\t\t\tspeech = contenido[0]\n\t\t\t\n\t\telse:\n \t\t\tspeech = \"Buscando informacion del producto \" + producto\n \n \telif req.get(\"result\").get(\"action\") == \"planes.salud\":\n \turl = \"https://api.segurossura.com.co/public/v1/directory/products\"\n \tmyResponse = requests.get(url)\n\n \tif(myResponse.ok):\n\t\t\tjData = json.loads(myResponse.text)\n\t\t\t\n\t\tspeech = \"Seguros Sura Colombia ofrece los siguientes planes de salud: \\n\"\n\t\t\n \tfor plan in jData:\n\t \t\tspeech = speech + \"\\n\" + plan[\"nombreField\"].title()\n\t\t\t\n\telif req.get(\"result\").get(\"action\") == \"info.especialistas\":\n\t\tproducto = parameters.get(\"plan-salud\")\n\t\tciudad = parameters.get(\"ciudad\")\n\t\tespecialidad = parameters.get(\"especialidad\")\n\t\t\n\t\turl = \"https://api.segurossura.com.co/public/v1/directory/search/\" + producto + \"/\" + ciudad + \"?speciality=\" + especialidad + \"&firstname=&secondname=&firstlastname=&secondlastname=\"\n\t\tmyResponse = requests.get(url)\n\n\t\tif(myResponse.ok):\n\t\t\tjData = json.loads(myResponse.text)\n\t\t\n\t\tspeech = \"Los profesionales que coinciden con tu busqueda son: \\n\"\n\n\t\tfor medico in jData:\n\t \t\tspeech = speech + \"\\n\" + medico[\"nombreField\"] + \"\\n Direccion: \" + medico[\"direccionField\"].title() + \"\\n Telefono: \" + medico[\"telefonoField\"] + \"\\n\"\n\t\t\t\n\telif req.get(\"result\").get(\"action\") == \"coberturas.producto\":\n\t\tproducto = parameters.get(\"productos\")\n\t\tspeech = \"Buscando coberturas del producto: \" + producto\n\telse:\n \tspeech =\" \"\n\n\treturn {\n \"speech\": speech,\n \"displayText\": speech,\n #\"data\": {},\n # \"contextOut\": [],\n \"source\": \"apiai-onlinestore-shipping\"\n \t}\n\n\nif __name__ == '__main__':\n\tport = int(os.getenv('PORT', 5000))\n \tprint \"Starting app on port %d\" % port\n \tapp.run(debug=True, port=port, host='0.0.0.0')\n" + }, + { + "comment": "Modify an existing Python function to eliminate any redundancies", + "code_snippet": "def just_me(name):\n print(\"Hi, my name is \"+name+\". I am special and nothing else matters.\")\n print(\"What about you?\")" + }, + { + "comment": "Create a simple chatbot using Python and state diagrams.", + "code_snippet": "# Create the states\nstate0 = {\n 'name': 'state0',\n 'message': 'Hello! How can I help you?',\n 'replies': {'I need some advice': 'state1', 'Can you answer my questions?': 'state2'}\n}\n\nstate1 = {\n 'name': 'state1',\n 'message': 'Sure, what kind of advice do you need?',\n 'replies': {'Career advice': 'state3','Health advice': 'state4'}\n}\n\nstate2 = {\n 'name': 'state2',\n 'message': 'Yes, I'll do my best! What do you want to know?',\n 'replies': {'Where can I find good tutorials': 'state5', 'Why is coding so hard': 'state6'}\n}\n\nstate3 = {\n 'name': 'state3',\n 'message': 'It is always a good idea to research the field you are interested in and creating a plan to learn the skills that are required. Make sure to contact professionals in the field to understand their experience.',\n 'replies': {}\n}\n\nstate4 = {\n 'name': 'state4',\n 'message': 'Focus on eating healthy and staying active. Make sure to consult a medical professional if you have any concerns.',\n 'replies': {}\n}\n\nstate5 = {\n 'name': 'state5',\n 'message': 'You can find tutorials online or join coding communities to get help from experienced coders. StackOverflow and Cod Academy are great resources.',\n 'replies': {}\n}\n\nstate6 = {\n 'name': 'state6',\n 'message': 'Coding can be challenging, especially at first. Just remember that with practice, patience and problem solving skills you can overcome the obstacles.',\n 'replies': {}\n}\n\n\n# Initialize the state\ncurrent_state = state0\n\n# Keep a record of conversation\nconversation = []\n\nwhile True:\n # Print the current state message\n print(current_state['message'])\n conversation.append(current_state['message'])\n\n # Get the user input\n user_input = input('>> ')\n conversation.append(user_input)\n\n # Update the state\n if user_input in current_state['replies']:\n current_state = eval(current_state['replies'][user_input])\n else:\n print(\"I don't understand what you mean. Please try again.\")\n\n# End the conversation when the state is terminal\n if not bool(current_state['replies']):\n print('Goodbye!')\n break\n\nprint(\"=========================\")\nprint(\"Conversation:\")\nfor message in conversation:\n print(message)" + }, + { + "comment": "Create a Python 3 program that takes an integer input from the user and prints its cube.", + "code_snippet": "number = int(input(\"Please enter a number: \"))\ncube = number**3\nprint(\"The cube of the number is: \" + str(cube))" + }, + { + "comment": "Write a Python program to find the first non-repeating character in a given list.", + "code_snippet": "def first_non_repeating_char(lst):\n dct = {}\n \n for l in lst:\n if l in dct:\n dct[l] += 1\n else:\n dct[l] = 1\n \n for l in lst:\n if dct[l] == 1:\n return l\n \n return None\n\nlst = [1, 2, 3, 4, 4, 3, 2, 1]\nprint(first_non_repeating_char(lst))" + }, + { + "comment": "Create a k-means clustering algorithm in Python.", + "code_snippet": "import numpy as np \n\ndef kmeans(k, data): \n \"\"\"\n Perform k-means clustering on data\n \"\"\"\n # randomly set initial centroids\n centroids = data[np.random.randint(low=0, high=len(data), size=k)]\n prev_centroids = np.zeros(centroids.shape)\n\n clusters = np.zeros(len(data))\n distances = np.zeros((len(data), k))\n \n # find the closest centroid for each sample\n while not np.allclose(centroids, prev_centroids):\n # save old centroids\n prev_centroids = centroids\n \n # get the distance between each point and each centroid\n for idx, centroid in enumerate(centroids):\n distances[:, idx] = np.linalg.norm(data - centroid, axis=1)\n \n # assign each sample to the closest centroids\n clusters = np.argmin(distances, axis=1)\n \n # update centroids \n for idx in range(k):\n centroids[idx] = np.mean(data[clusters == idx], axis=0)\n \n return clusters, centroids" + }, + { + "comment": "Write a Python code snippet that produces the following output:", + "code_snippet": "for i in range(7):\n for j in range(7):\n if (i + j) % 2 == 0:\n print('#', end='')\n else:\n print(' ', end='')\n print()" + }, + { + "comment": "Construct a Python class that takes a given number and calculates the sum of the digits.", + "code_snippet": "class SumOfDigits:\n \n def __init__(self, num):\n self.num = num\n \n def calculate_sum_of_digits(self):\n total = 0\n for ch in str(self.num):\n total += int(ch)\n return total\n\ndigit_sum = SumOfDigits(6342)\nresult = digit_sum.calculate_sum_of_digits()\nprint(result)" + }, + { + "comment": "Implement a sorting algorithm in Python that takes a list of integers as input and outputs the list sorted in ascending order.", + "code_snippet": "def sort_ascending(nums):\n for i in range(len(nums)):\n min_idx = i\n for j in range(i + 1, len(nums)):\n if nums[min_idx] > nums[j]:\n min_idx = j\n nums[i], nums[min_idx] = nums[min_idx], nums[i] \n return nums" + }, + { + "comment": "Create a feature extraction function in Python to extract data from a json string.", + "code_snippet": "def feature_extraction(input_str):\n import json\n data = json.loads(input_str)\n\n features = {\n \"id\": data[\"id\"],\n \"title\": data[\"title\"],\n \"body_len\": len(data[\"body\"])\n }\n\n return features" + }, + { + "comment": "Construct a Python program to combine the elements of two lists into a list of tuples.", + "code_snippet": "def combine_list(array1, array2):\n return list(zip(array1, array2))" + }, + { + "comment": "Design and implement a neural network in Python to predict the price of a house given a set of features.", + "code_snippet": "import numpy as np\nfrom keras.models import Sequential\nfrom keras.layers import Dense\n\nfeatures = np.array([[0.8, 0.4, 0.7]])\n\nmodel = Sequential()\nmodel.add(Dense(64, input_dim=3, activation='relu'))\nmodel.add(Dense(32, activation='relu'))\nmodel.add(Dense(1, activation='linear'))\n\nmodel.compile(loss='mse', optimizer='adam', metrics=['mse'])\n\nmodel.fit(features, batch_size=3,epochs=200)" + }, + { + "comment": "Create a Python program that takes two arguments in the form of two lists and calculates the dot product of the two arguments.", + "code_snippet": "def dot_product(list1, list2):\n product = 0\n for i in range(len(list1)):\n product += (list1[i] * list2[i])\n return product\n#Example\nlist1 = [1,2]\nlist2 = [3,4]\n\nresult = dot_product(list1, list2)\nprint(result)\n#Output\n11" + }, + { + "comment": "Create a Python web application for a car insurance calculator. The application should have a page where the user can input the data (e.g age, gender, car model, etc) and should return a calculated insurance price.", + "code_snippet": "import flask\nfrom flask import request, jsonify\n\n# Creating the application app\napp = flask.Flask(__name__)\n\n\n@app.route('/calculate', methods=['POST'])\ndef calculate():\n # Get the input data from the user\n data = request.get_json()\n\n age = data['age']\n gender = data['gender']\n car_model = data['car_model']\n\n # Calculate the insurance price\n # ...\n insurance_price = 0.00\n\n # Return the calculated insurance price in JSON format\n return jsonify({'insurance_price': insurance_price})\n\t\napp.run(host='0.0.0.0', port=8006)" + }, + { + "comment": "Create a Python script that generates a list of the prime numbers between two given numbers. The script should use the Sieve of Eratosthenes algorithm to generate the prime numbers", + "code_snippet": "def sieve_of_eratosthenes(start, end):\n prime = [True] * (end + 1)\n p = 2\n while p * p <= end:\n if prime[p]:\n for i in range(p * p, end + 1, p):\n prime[i] = False\n p += 1\n prime_numbers = []\n\n for p in range (start, end):\n if prime[p]:\n prime_numbers.append(p)\n\n return prime_numbers\n\nprint (sieve_of_eratosthenes(1, 20))" + }, + { + "comment": "Construct a program in Python to compute the sum of square numbers in a given range, including the start and end numbers.", + "code_snippet": "def sumOfRanges(start, end):\n sum = 0\n \n for i in range(start, end+1):\n sum += i*i\n \n return sum\n\nsumOfRanges(10,20) #673" + }, + { + "comment": "In Python, build a program that can remove all vowels from a given string.", + "code_snippet": "def remove_vowels(string):\n vowels = ['a','e','i','o','u']\n new_string = ''\n for char in string:\n if char.lower() not in vowels:\n new_string += char\n return new_string" + }, + { + "comment": "Improve the following function by using Python built-in functions:", + "code_snippet": "def sum_array(array):\n '''A function that takes in an array of numbers and returns the sum of all the numbers'''\n return sum(array)" + }, + { + "comment": "Write a python code for sorting n distinct integers using insertion sort.", + "code_snippet": "def insertion_sort(arr):\n for i in range(1, len(arr)): \n \n key = arr[i] \n \n # Move elements of arr[0..i-1], \n # that are greater than key, \n # to one position ahead of \n # their current position \n j = i-1\n while j >= 0 and key < arr[j] : \n arr[j + 1] = arr[j] \n j -= 1\n arr[j + 1] = key" + }, + { + "comment": "Create a Python script for a django-like ORM with standard CRUD functionality. The script should include methods for Create, Read, Update and Delete.", + "code_snippet": "import sqlite3\n\nclass Model:\n\n def __init__(self):\n self.conn = sqlite3.connect(\":memory:\")\n self.cursor = self.conn.cursor()\n\n def create(self,name):\n self.cursor.execute(\n\t\t\t\"CREATE TABLE IF NOT EXISTS Models (name TEXT)\"\n\t\t)\n self.cursor.execute(\n\t\t\t\"INSERT INTO Models(name) VALUES (?)\",\n\t\t\t(name,),\n\t\t)\n self.conn.commit()\n\n def read(self):\n self.cursor.execute(\"SELECT * FROM Models\")\n return self.cursor.fetchall()\n\n def update(self,name):\n self.cursor.execute(\n\t\t\t\"UPDATE Models SET name = ? WHERE name = ?\", \n\t\t\t(name, name,),\n\t\t)\n self.conn.commit()\n \n def delete(self, name):\n self.cursor.execute(\n\t\t\t\"DELETE FROM Models WHERE name = ?\",\n\t\t\t(name,),\n\t\t)\n self.conn.commit()" + }, + { + "comment": "Can you write a Python script to sort a list of strings alphabetically?", + "code_snippet": "strings = [\"Hello\", \"Yes\", \"No\", \"World\"]\n\nstrings.sort()\n\nprint(strings)" + }, + { + "comment": "Write a class in Python that can find the median of an unsorted list of numbers.", + "code_snippet": "class MedianFinder:\n def __init__(self):\n self._data = []\n \n def add(self, num):\n self._data.append(num)\n \n def find_median(self):\n data = sorted(self._data)\n n = len(data)\n return (data[(n-1)//2] + data[n//2])/2" + }, + { + "comment": "Generate an array of random numbers in Python.", + "code_snippet": "import numpy as np\n\nsize = 10\nrandom_array = np.random.randint(0, 100, size)\nprint(random_array)" + }, + { + "comment": "Compute the edit distance between two strings in Python.", + "code_snippet": "def editDistance(string1, string2, m, n):\n # Create an empty matrix\n dp = [[0 for x in range(n+1)] for x in range(m+1)] \n \n # Filling the first column of the matrix\n for i in range(m+1): \n dp[i][0] = i \n \n # Filling the second column of the matrix \n for j in range(n+1): \n dp[0][j] = j \n \n # Populate the matrix\n for i in range(1, m+1): \n for j in range(1, n+1): \n \n if string1[i-1] == string2[j-1]: \n dp[i][j] = dp[i-1][j-1] \n else: \n dp[i][j] = 1 + min(dp[i][j-1], # Insert\n dp[i-1][j], # Remove \n dp[i-1][j-1]) # Replace\n \n return dp[m][n] \n \n \n# Driver program \nstring1 = \"kitten\"\nstring2 = \"sitting\"\nm = len(string1) \nn = len(string2) \nprint(editDistance(string1, string2, m, n))" + }, + { + "comment": "Create a 3-layer artificial neural network using Python and print the outcome.", + "code_snippet": "import numpy as np\n\n# define the 3 layers #\ninput_layer = np.array([2, 3])\nhidden_layer = np.array([[0.1, 0.4],\n [0.8, 0.6],\n [0.3, 0.9]])\noutput_layer = np.array([0.3, 0.7])\n\n# compute the output of the 3-layer network #\nhidden_layer_output = np.dot(input_layer, hidden_layer)\noutput = np.dot(hidden_layer_output, output_layer)\n\nprint(output)" + }, + { + "comment": "Create a Python program to read in 15 integers and print the sum of them.", + "code_snippet": "def sum_nums(nums):\n total = 0\n for num in nums:\n total += num\n return total\n\nlst = [1, 5, 9, 0, 2, 4, 7, 8, 6, 10, 3, 11, 12, 13, 14]\ntotal = sum_nums(lst)\nprint(total)" + }, + { + "comment": "Implement a REST API in Python for creating and reading users. The API should have endpoints for creating a user, reading all users, and reading a single user.", + "code_snippet": "from flask import Flask, jsonify, request\nfrom flask_mysqldb import MySQL\n\napp = Flask(name)\n\n# Mysql configuration\napp.config['MYSQL_USER'] = 'root'\napp.config['MYSQL_PASSWORD'] = 'password'\napp.config['MYSQL_DB'] = 'users'\napp.config['MYSQL_CURSORCLASS'] = 'DictCursor'\n\nmysql = MySQL(app)\n\n@app.route('/users', methods=['POST'])\ndef create_user():\n name = request.json['name']\n email = request.json['email']\n\n cur = mysql.connection.cursor()\n cur.execute(\"INSERT INTO users (name, email) VALUES (%s, %s)\", (name, email))\n mysql.connection.commit()\n cur.close()\n\n return jsonify({'message': 'User created'})\n\n@app.route('/users', methods=['GET'])\ndef get_users():\n cur = mysql.connection.cursor()\n cur.execute(\"SELECT * FROM users\")\n users = cur.fetchall()\n cur.close()\n\n return jsonify(users)\n\n@app.route('/users/int:id', methods=['GET'])\ndef get_user(id):\n cur = mysql.connection.cursor()\n cur.execute(\"SELECT * FROM users WHERE id=%s\", (id))\n user = cur.fetchone()\n cur.close()\n\n return jsonify(user)\n\nif name == \"main\":\n app.run(debug=True)" + }, + { + "comment": "Explain the approach for solving a linear programming problem using the Simplex method in Python.", + "code_snippet": "The Simplex Method is an algorithm for solving a linear programming problem, which involves finding the maximum or minimum value of a linear function. This method uses simultaneous equations to define a polytope that contains the solution to the linear programming problem. It then finds the optimal solution in this polytope by iteratively making moves from a vertex to another in the polytope that gives an increase in the message value. \n\nTo solve a linear programming problem using the Simplex Method in Python, first, define the problem in terms of the decision variables, objective function, and constraints. Next, convert the problem into standard form by introducing slack variables, if needed. Then, create a tableau containing the coefficients of the decision variables and the right-hand sides of the constraints, and set up the starting solution in the tableau. Now, iteratively use the Simplex algorithm to move from a vertex to the next until an optimum solution is reached. Determine the optimum solution by inspecting the values in the tableau." + }, + { + "comment": "Optimize a Python algorithm to find the greatest 2 numbers from a 10000 integers list.", + "code_snippet": "def get_greatest_2(items):\n max1 = max2 = float('-inf')\n for item in items:\n if item > max1:\n max1, max2 = item, max1\n elif item > max2:\n max2 = item\n return max1, max2\n\nmyList = [1, 5, 3, 7, 4, 7, 8, 9, 2, 10, 6, 8]\nprint('The greatest 2 numbers from the array:', get_greatest_2(myList))\n\nOutput:\nThe greatest 2 numbers from the array: (9, 8)" + }, + { + "comment": "Write a python code that takes in a list of integers and removes all the duplicate elements from it.", + "code_snippet": "final_list = [] \nfor num in list_of_integers: \n if num not in final_list: \n \tfinal_list.append(num) \nprint(final_list)" + }, + { + "comment": "Using the NumPy library, create a Python program to generate a 3X3 array of random integers ranging from 1 to 5.", + "code_snippet": "import numpy as np \n\na = np.random.randint(1, 6, size=(3,3)) \nprint(a)" + }, + { + "comment": "Can you provide a code snippet in Python that takes a list of strings as input and sorts them based on their length?", + "code_snippet": "words = [\"Hello\",\"World!\",\"Good\",\"Morning\"]\n\nwords.sort(key=len)\n\nprint(words)" + }, + { + "comment": "Create a Python program to classify the sentiment of movie reviews using Natural Language Processing.", + "code_snippet": "import nltk\nfrom nltk.sentiment import vader\n\nsentence = 'The movie was terrible. It felt like it would never end!'\n\nsid = vader.SentimentIntensityAnalyzer()\nscores = sid.polarity_scores(sentence)\n\nif scores['neg'] > scores['pos']:\n print('The sentiment of the sentence is negative!')\nelse:\n print('The sentiment of the sentence is positive!')" + }, + { + "comment": "Create a python script that parses a CSV and generates a new CSV that adds the total number of rows, total number of columns, and total amount of the data for each attribute.", + "code_snippet": "\"\"\"\nParses a CSV and generates a new CSV that adds the total number of rows, total \nnumber of columns, and total amount of data for each attribute.\n\"\"\"\n\nimport csv\n\ndef parse_csv(filename):\n with open(filename) as csv_file:\n reader = csv.DictReader(csv_file)\n rows =0\n cols = 0\n data_totals = []\n for row in reader:\n rows += 1\n cols = len(row)\n for col in row:\n if len(data_totals) < cols:\n data_totals.append([row[col],1])\n else:\n existing_total = data_totals[cols - 1]\n new_total = [existing_total[0] + float(row[col]), existing_total[1] + 1]\n data_totals.append(new_total)\n #write to new csv\n with open('new_csv.csv', mode='w') as new_csv_file:\n writer = csv.writer(new_csv_file, delimiter=',', quotechar='\"', quoting=csv.QUOTE_MINIMAL)\n writer.writerow(['Rows', 'Columns', 'Data Totals'])\n writer.writerow([rows, cols, data_totals])\n\nif __name__ == '__main__':\n filename = 'test.csv'\n parse_csv(filename)" + }, + { + "comment": "Create a class in Python which matches the following criteria.", + "code_snippet": "class Song:\n def __init__(self, title, artist):\n self.title = title\n self.artist = artist\n \n def __str__(self):\n return f'Title: {self.title}\\nArtist: {self.artist}'\n \n def __eq__(self, other):\n return self.artist == other.artist and self.title == other.title\n\n def length(self):\n return len(self.title)" + }, + { + "comment": "Create a Python script to scrape a given website.", + "code_snippet": "import requests\nfrom bs4 import BeautifulSoup\n\nurl = ''\npage = requests.get(url)\nsoup = BeautifulSoup(page.text, 'html.parser')\n\n# Scraping code goes here...." + }, + { + "comment": "Create objects of type Student in Python containing relevant attributes.", + "code_snippet": "class Student:\n def __init__(self, name, age, gender):\n self.name = name\n self.age = age\n self.gender = gender" + }, + { + "comment": "Design and write a Python program to extract all public email addresses from a given string", + "code_snippet": "import re\n\ndef extract_emails(text):\n emails = re.findall(r\"[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\\.[a-zA-Z0-9-.]+\", text)\n return emails\n\nextracted_emails = extract_emails(\"My email is john@example.com and yours is john@gmail.com\")\nprint(extracted_emails)" + }, + { + "comment": "Generate a basic REST API with authentication and endpoints in Python.", + "code_snippet": "from flask import Flask, request\nfrom flask_restful import Resource, Api\nfrom flask_jwt import JWT, jwt_required\n\napp = Flask(name)\napi = Api(app)\n\n# Setup JWT authentication\napp.secret_key = 'super-secret-key'\njwt = JWT(app, authenticate, identity)\n\nitems = []\n\nclass Item(Resource):\n @jwt_required()\n def get(self, name):\n for item in items:\n if item['name'] == name:\n return item\n return {'item': None}, 404\n\n def post(self, name):\n new_item = {'name': name, 'price': 12.00}\n items.append(new_item)\n return new_item, 201\n\n\nclass ItemList(Resource):\n def get(self):\n return {'items': items}\n\n\napi.add_resource(Item, '/item/')\napi.add_resource(ItemList, '/items')\n\nif name == 'main':\n app.run(port=5000, debug=True)" + }, + { + "comment": "Generate a set of Python functions to take inputs values from a list of objects and output a result. The output for each object should include the object's ID and the calculated results from the given inputs.", + "code_snippet": "def calculate_result(obj_list):\n result_list = []\n\n for obj in obj_list:\n result = {}\n result['id'] = obj['id']\n result['result'] = sum(obj['inputs'])\n\n result_list.append(result)\n\nreturn result_list\n\nobj_list = [{\"id\": 1, \"inputs\": [1,2,3]}, \n{\"id\": 2, \"inputs\": [2,3,4]}]\n\nprint(calculate_result(obj_list))\n\n# Output: [{'id': 1, 'result': 6}, {'id': 2, 'result': 9}]" + }, + { + "comment": "Reverse an input string using Python.", + "code_snippet": "def rev_string(s): \n return s[::-1] \n \ns = \"Hello World!\"\nprint(rev_string(s))" + }, + { + "comment": "Create an algorithm in Python to find the longest common subsequence of two sequences.", + "code_snippet": "#Function to find the LCS of two strings\ndef find_lcs(x, y): \n # Find lengths of the two strings \n m = len(x) \n n = len(y) \n \n #To store the lengths of longest common subsequences\n lcs = [[0 for x in range(n+1)] for x in range(m+1)] \n \n #Fill the table in bottom up manner \n for i in range(m+1): \n for j in range(n+1): \n if i == 0 or j == 0: \n lcs[i][j] = 0\n elif x[i-1] == y[j-1]: \n lcs[i][j] = lcs[i-1][j-1] + 1\n else: \n lcs[i][j] = max(lcs[i-1][j], lcs[i][j-1]) \n \n #To store one of the possible common subsequence\n index = lcs[m][n] \n lcs_str = [\"\"] * (index+1) \n lcs_str[index] = \"\"\n \n #Following loop code is used to find one of the longest common subsequence\n i = m \n j = n \n while i > 0 and j > 0: \n \n #If current character in X and Y are same,then\n # reduce both the count and add the character to the result string\n if x[i-1] == y[j-1]: \n lcs_str[index-1] = x[i-1] \n i-=1\n j-=1\n index-=1\n \n # If not same, move to the left, top or corner (diagonal) cell \n #Whichever has the max value\n elif lcs[i-1][j] > lcs[i][j-1]: \n i-=1\n else: \n j-=1\n \n # Return the longest common subsequence\n return \"\".join(lcs_str)\n\n# Main function\nx = \"abcdaf\"\ny = \"acbcf\"\nres = find_lcs(x,y)\nprint(\"The Longest Common Subsequence is:\")\nprint(res)\n\n# Output: The Longest Common Subsequence is: \n# abcf" + }, + { + "comment": "Write a python script to generate a random password of length 8, which consists of alphanumeric characters and at least one special character.", + "code_snippet": "import string\nimport random\n\ndef generate_password():\n chars = string.ascii_letters + string.digits + string.punctuation\n pwd_length = 8\n\n password = ''\n for i in range(pwd_length):\n password += random.choice(chars)\n \n # Check if at least one special character exists in the password\n if any([char in string.punctuation for char in password]):\n return password\n else:\n return generate_password()\n\nprint(generate_password())" + }, + { + "comment": "Create a Python script that takes the list [1, 2, 3, 4, 5] and returns the sum.", + "code_snippet": "def sum_list(lst):\n total = 0\n for num in lst:\n total += num\n return total\n\nsum_list([1,2,3,4,5])" + }, + { + "comment": "Edit the Python program to sort words in a given string by their length in decreasing alphabetical order.", + "code_snippet": "my_string = \"This is the string which is to be sorted\"\n \nwords = my_string.split(' ') # split string into list of words \n \n# create a list of tuples where the first element \n# is the length of the word and the second element \n# is the word itself \nwords_with_length = [(len(word), word) for word in words] \n \n# sort list of tuples according to 1st element of tuple i.e. length of word \nwords_with_length.sort(reverse = True) \n \n# wl contains list words in decreasing order of their length \n# now use join() to join all words whith \" \" \nsorted_string = \" \".join([i[1] for i in words_with_length]) \n \n# print sorted string \nprint(sorted_string)" + }, + { + "comment": "Write a python program to calculate maximum revenue.", + "code_snippet": "def max_revenue(prices):\n diff = [prices[i + 1] - prices[i] for i in range(len(prices) - 1)] \n max_diff = 0\n for i in range(len(diff)): \n for j in range(i + 1, len(diff) + 1): \n current_diff = sum(diff[i:j]) \n if current_diff > max_diff: \n max_diff = current_diff \n \n return max_diff" + }, + { + "comment": "Write a Python program to print the following output pattern", + "code_snippet": "for i in range(1,6):\n for j in range(1,i+1):\n print(j, end=\"\")\n print()" + }, + { + "comment": "Develop a loop that prints a python array from the end to the beginning.", + "code_snippet": "def printArrayReverse(arr): \n for i in range(len(arr) - 1, -1, -1): \n print(arr[i], end = \" \")\n\nprintArrayReverse(arr)" + }, + { + "comment": "Given a list of phone numbers, extract only the ones that start with 08 using Python.", + "code_snippet": "phone_numbers = ['0834567', '0945678', '07123456', '08901234', '07890123']\nresult = [number for number in phone_numbers if number.startswith('08')]\nprint(result)" + }, + { + "comment": "Write a Python program that takes a list and a search query as input and returns the indices of all elements in the list which match the search query.", + "code_snippet": "def search_list(list, search_query):\n result = []\n for i in range(len(list)):\n if list[i] == search_query:\n result.append(i)\n return result\n\nsearch_list(list, search_query) # returns [0]" + }, + { + "comment": "Write a function in Python that takes an array of integer values and reverses the order of the values.", + "code_snippet": "def reverse_array(arr):\n return arr[::-1] # Returns a slice of the list in reverse order" + }, + { + "comment": "Write a function in Python to create a random string of 10 characters.", + "code_snippet": "import random\n\ndef generate_string(string_length):\n random_characters = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'\n random_string = ''.join(random.choices(random_characters, k=string_length))\n return random_string\n\nprint(generate_string(10)) # Length: 10" + }, + { + "comment": "Develop a Tic Tac Toe game in Python.", + "code_snippet": "# define the board\nboard = [\" \" for i in range(9)]\n\ndef print_board():\n row1 = \"| {} | {} | {} |\".format(board[0], board[1], board[2])\n row2 = \"| {} | {} | {} |\".format(board[3], board[4], board[5])\n row3 = \"| {} | {} | {} |\".format(board[6], board[7], board[8])\n\n print()\n print(row1)\n print(row2)\n print(row3)\n print()\n\n# define player turn\ndef player_turn(icon):\n\n if icon == \"X\":\n number = 1\n elif icon == \"O\":\n number = 2\n \n print(\"Your turn player {}\".format(number))\n \n choice = int(input(\"Enter your move (1-9): \").strip())\n if board[choice -1] == \" \":\n board[choice -1] = icon\n else:\n print()\n print(\"That space is taken!\")\n\n# check win\ndef is_victory(icon):\n if (board[0] == icon and board[1] == icon and board[2] == icon) or \\\n (board[3] == icon and board[4] == icon and board[5] == icon) or \\\n (board[6] == icon and board[7] == icon and board[8] == icon) or \\\n (board[0] == icon and board[3] == icon and board[6] == icon) or \\\n (board[1] == icon and board[4] == icon and board[7] == icon) or \\\n (board[2] == icon and board[5] == icon and board[8] == icon) or \\\n (board[0] == icon and board[4] == icon and board[8] == icon) or \\\n (board[2] == icon and board[4] == icon and board[6] == icon):\n return True\n else:\n return False\n \n# define main function\ndef main():\n print(\"Welcome to Tic Tac Toe!\")\n print_board()\n \n while True:\n player_turn(\"X\")\n print_board()\n if is_victory(\"X\"):\n print(\"X Wins! Congratulations!\")\n break\n elif not \" \" in board:\n print(\"It's a tie!\")\n break\n\n player_turn(\"O\")\n print_board()\n if is_victory(\"O\"):\n print(\"O Wins! Congratulations!\")\n break\n elif not \" \" in board:\n print(\"It's a tie!\")\n break\n\n\nmain()" + }, + { + "comment": "Write Python code to merge two sorted lists.", + "code_snippet": "def merge_sorted_lists(listA, listB): \n merged_list = []\n ptrA = 0\n ptrB = 0\n while ptrA < len(listA) and ptrB < len(listB): \n if listA[ptrA] <= listB[ptrB]: \n merged_list.append(listA[ptrA]) \n ptrA += 1\n else: \n merged_list.append(listB[ptrB]) \n ptrB += 1\n while ptrA < len(listA): \n merged_list.append(listA[ptrA]) \n ptrA += 1\n while ptrB < len(listB):\n merged_list.append(listB[ptrB])\n ptrB += 1\n return merged_list" + }, + { + "comment": "Create a Python program to find the longest common subsequence between two strings.", + "code_snippet": "# Program to find the longest common subsequence between two strings \ndef lcs(X, Y): \n # find the length of the strings \n m = len(X) \n n = len(Y) \n\n # declaring the array for storing the dp values \n L = [[None]*(n+1) for i in range(m+1)] \n\n # Following steps to build L[m+1][n+1] in bottom up manner \n for i in range(m+1): \n for j in range(n+1): \n if i == 0 or j == 0 : \n L[i][j] = 0\n elif X[i-1] == Y[j-1]: \n L[i][j] = L[i-1][j-1]+1\n else: \n L[i][j] = max(L[i-1][j] , L[i][j-1]) \n\n # L[m][n] contains the length of LCS of X[0..n-1] & Y[0..m-1] \n return L[m][n] \n\n# Driver program \nX = \"ABACEDF\"\nY = \"ABDCF\"\n\nprint(\"Length of LCS is \", lcs(X, Y))" + }, + { + "comment": "Construct a naive bayes classifier using python to classify emails as spam or not spam.", + "code_snippet": "from sklearn.naive_bayes import GaussianNB\n\n# Create a Gaussian Classifier\nmodel = GaussianNB()\n\n# Train the model using the training sets \nmodel.fit(X_train, y_train)\n\n# Predict Output \ny_pred = model.predict(X_test)" + }, + { + "comment": "This code performs a series of calculations and plots based on input data. Before running the script, run eval_spectrum.py and calibration.py with appropriate measurements. The script imports necessary modules and functions from external sources. The main function loads the input data, performs calibration, calculates various values, and generates plots based on instructions provided in a separate text file. The generated plots show absorbed power vs emitted power, absorbed power vs reflectivity, and dissipated power vs wavelength. The script also calculates thermal resistance and displays it on the plot. To run the code, execute the main function.", + "code_snippet": "#! /usr/bin/python2.7\n# -*- coding: utf-8 -*-\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nimport errorvalues as ev # github.com/stefantkeller/errorvalues\n\nfrom VECSELsetup.eval.varycolor import varycolor\nfrom VECSELsetup.eval.gen_functions import load, extract, plotinstructions_write, plotinstructions_read, lut_from_calibfolder, lut_interp_from_calibfolder, thermal_resistance\n\n\ndef main():\n # before running this script:\n # run eval_spectrum.py to provide the .._eval.csv files required for the spectra\n # run calibration.py (with appropriate calib measurements)\n # and don't forget temperature_heatsink (this is not necessary for this script here, but it provides interesting insights for the measurement at hand)\n logfile = '../24_LL_ev/20150211_sample21-1-d6/spot333um.csv'\n calib_folder = '../24_LL_ev/20150204_calib_333um_s21-1-d6'\n \n \n #------------------------------------\n # calibration\n emis_lut = lut_from_calibfolder(calib_folder,identifiers=['Laser'],ignore_error=False) # emission has constant value solely due to BS, no ND in front of detector etc.\n pump_lut, refl_lut = lut_interp_from_calibfolder(calib_folder,identifiers=['Pump','Refl'])\n \n \n #------------------------------------\n # load measurement\n current_set, current, pump, refl, laser, spectra, meantemp = extract(logfile, identifiers=['Current','Pump','Refl','Laser','Spectra', 'Temperature'])\n Temperatures = sorted(current_set.keys()) # set temperatures (round numbers like 15.0 or 22.5 etc)\n T_out = dict((T,meantemp[T].round(1)) for T in Temperatures) # real temperatures for display in plot, including +-uncertainty\n\n\n #------------------------------------\n # calculate using calibration\n absorbed, reflected, emitted, pumped, dissipated = {}, {}, {}, {}, {}\n for T in Temperatures:\n reflected[T] = refl_lut(refl[T])\n pumped[T] = pump_lut(pump[T])\n absorbed[T] = pumped[T] - reflected[T]\n emitted[T] = emis_lut(laser[T])\n dissipated[T] = absorbed[T] - emitted[T]\n\n \n #\n #------------------------------------\n # invoke instructions for plot and fit\n # plotting the data can be tricky to reproduce, store the plot properties in a text file and read from there!\n # (easy to repeat the plot at a later time)\n # open the instruction file in a text editor, edit the instructions and run this module again; it will use the new instructions\n instrfile = logfile[:-4]+'_instr.csv'\n plotinstructions_write(instrfile,Temperatures,calib_folder)\n \n #------------------------------------\n # retrieve instructions\n instr = plotinstructions_read(instrfile)\n \n #\n #------------------------------------\n # translate instructions\n str2lst = lambda s: map(float,s[1:-1].split(','))\n\n textx = float(instr['textx']) # x coordinate for text; same for first two subplots (absorbed-emitted and absorbed-reflectivity)\n fontsize = float(instr['fontsize'])\n title = instr['title']\n xlim = str2lst(instr['xlim']) # range of x-axis; same for first two subplots\n ylim1 = str2lst(instr['ylim1']) # range of y-axis of first (aborbed-emitted) plot\n ylim2 = str2lst(instr['ylim2']) # range of second y-axis (absorbed-reflectivity)\n xlim3 = str2lst(instr['xlim3']) # third x-axis; (dissipated-wavelength)\n ylim3 = str2lst(instr['ylim3']) # 3rd y-axis\n plot_temps_for_3 = str2lst(instr['plot_temps_for_3']) # which ones to plot? you may have measured a heat sink temperature without lasing output, whose data will confuse the reader, so you don't plot it.\n textx3 = float(instr['textx3']) # x-coordinate of text in 3rd plot\n texty3 = str2lst(instr['texty3']) # 3rd y-coordinate\n llow0 = {}\n lhigh0 = {}\n texty1 = {}\n for T in Temperatures:\n llow0[T] = sum(absorbed[T].v()threshold, max_num : \n max_num = num \n\nprint(max_num)" + }, + { + "comment": "Write a Python program to find a substring in a string.", + "code_snippet": "def find_substring(source, substring):\n '''This function returns the start index of a substring in given string '''\n if substring not in source:\n return -1\n i=0\n while i < len(source):\n if source[i] == substring[0]:\n flag = True\n for j in range(len(substring)):\n if substring[j] != source[i+j]:\n flag = False\n break\n if flag:\n return i\n i += 1\n return -1" + }, + { + "comment": "Given a list of numbers, create a Python script to print all the elements greater than 50.", + "code_snippet": "nums = [2, 60, 5, 4, 78, 32, 99]\n\nfor num in nums:\n if num > 50:\n print(num)" + }, + { + "comment": "Create a program in Python to sort a given array in ascending order using bubble sort.", + "code_snippet": "def bubble_sort(arr): \n n = len(arr) \n for i in range(n-1):\n for j in range(0, n-i-1): \n if arr[j] > arr[j+1] : \n arr[j], arr[j+1] = arr[j+1], arr[j] \n\narr = [3,7,4,1]\nbubble_sort(arr) \nprint (\"Sorted array :\") \nfor i in range(len(arr)): \n print (\"%d\" %arr[i])," + }, + { + "comment": "Generate a function in Python that takes a number and prints its factors.", + "code_snippet": "def print_factors(x):\n \"\"\"\n This function takes a number and prints its factors\n \n :param x: The number whose factors to be printed \n :return: None\n \"\"\"\n \n print(\"The factors of\",x,\"are:\")\n for i in range(1, x + 1):\n if x % i == 0:\n print(i)\n \n##\n5. Instruction: Generate a basic HTML form using JavaScript to submit a user's city and the submit the details to the server.\n5. Input: Not applicable\n5. Output: \n
\n City:
\n
\n \n
\n\n" + }, + { + "comment": "Write a python script to generate an array of random numbers between two specified values.", + "code_snippet": "import random\n\n# Generate an array of random numbers between two specified values\ndef generate_array(min, max):\n array = []\n \n for i in range(min, max):\n array.append(random.randint(min, max))\n \n return array\n \nmin = 0\nmax = 50\nprint(generate_array(min, max))" + }, + { + "comment": "Create a Python program to filter out words containing five characters or less from a list of words.", + "code_snippet": "# Function to filter out words containing five characters or less\ndef filterWords(arr):\n result = [word for word in arr if len(word) > 5]\n return result\n\n# Get the input\narr = [\"Hello\", \"Good\", \"Where\", \"Been\", \"World\", \"House\"]\n\n# Call the function\nresult = filterWords(arr)\n\n# Print the result\nprint(\"Filtered list of words:\", result)" + }, + { + "comment": "Create a Python function to identify the number of even numbers present in a given list of numbers.", + "code_snippet": "def countEvenNum(list):\n return len([x for x in list if x % 2 == 0])\n\nlist = [1, 2, 3, 4, 5]\nprint(countEvenNum(list))" + }, + { + "comment": "Write a Python function to generate a random string of length 8 containing alphanumeric characters.", + "code_snippet": "import string\nimport random\n\ndef generate_random_string(length):\n chars = string.ascii_letters + string.digits\n return ''.join(random.choice(chars) for _ in range(length))" + }, + { + "comment": "Write a Python program to solve a linear equation system of the form: ax + by = c.", + "code_snippet": "def solve_linear_equation_system(a, b, c):\n x = (c - b) / a\n y = (c - a*x) / b\n return x, y\n\nx, y = solve_linear_equation_system(a, b, c)\nprint(\"x = {}, y = {}\".format(x, y)) # x = 2.0, y = 2.0" + }, + { + "comment": "This code loads a JSON fixture file, extracts properties from it, creates Property objects from the properties, and saves them to the database. To use this code, you need to create a JSON fixture file with a \"properties\" key containing a list of property objects. Then, run the code with the appropriate command line arguments to specify the fixture file and folder. The saved objects will be printed to the console.", + "code_snippet": "#!/usr/bin/env python\n# coding: utf-8\n\nfrom __future__ import (\n print_function,\n unicode_literals,\n absolute_import\n)\n\nimport argparse\nimport json\nimport os\n\n\ndef get_path():\n return unicode(os.path.abspath('.'))\n\n\ndef parse_args():\n _parser = argparse.ArgumentParser()\n _parser.add_argument('--fixture', type=str, help='fixture file to load', default='properties.json')\n _parser.add_argument('--fixture_folder', type=str,\n default='models/fixtures',\n help='where fixtures are stored.'\n )\n return _parser.parse_args()\n\ndef main(base_path):\n properties_to_save = []\n args = parse_args()\n path = os.path.sep.join([base_path,\n 'app',\n args.fixture_folder,\n args.fixture])\n with open(path) as file_:\n data = json.load(file_)\n properties = data['properties']\n for property_ in properties:\n property_.pop('id')\n properties_to_save.append(Property(**property_))\n Property.objects.insert(properties_to_save)\n\n return len(properties_to_save)\n\n\nif __name__ == '__main__':\n from app.models.properties import Property\n base_path = get_path()\n out = main(base_path)\n print(\"{} objects saved\".format(out))" + }, + { + "comment": "Create an algorithm in Python to determine the most efficient order of tasks from an array of given tasks.", + "code_snippet": "def most_efficient_order(tasks):\n \"\"\"\n This method takes a list of tasks and produces the most efficient order for completing the tasks.\n\n Parameters\n ----------\n tasks: List\n A list of tasks to be performed.\n\n Returns\n -------\n List\n An ordered list of tasks representing the most efficient order for completing the tasks.\n \"\"\"\n tasks_to_complete = tasks.copy()\n order = [] \n while tasks_to_complete:\n min_time = float(\"inf\")\n task_index = None\n for i, task in enumerate(tasks_to_complete):\n if task.get_time() < min_time:\n min_time = task.get_time()\n task_index = i\n order.append(tasks_to_complete.pop(task_index))\n return order" + }, + { + "comment": "Optimize a given Python script that reverse a string.", + "code_snippet": "def reverse_string(string):\n return string[::-1]" + }, + { + "comment": "Create an HTML form with a submit button to send information to a Python program.", + "code_snippet": "
\n \n
" + }, + { + "comment": "Generate a Python script that contains a function with two parameters and returns the sum of the two parameters.", + "code_snippet": "def add(x, y): \n \"\"\"Returns the sum of x and y\"\"\"\n return x + y" + }, + { + "comment": "Create a Python program to read an array of integers and calculate their sum.", + "code_snippet": "def calculate_sum(arr):\n sum = 0\n\n for i in arr:\n sum += i\n\n return sum\n\ninput_array = [1, 3, 5, 7]\n\nresult = calculate_sum(input_array)\n\nprint(result) # prints 16" + }, + { + "comment": "Create a Python program that multiplies two input numbers and prints the result.", + "code_snippet": "def multiply(a, b):\n result = a * b\n print(result)\n\nmultiply(a, b)" + }, + { + "comment": "The code is a Python class called FileSystem that provides methods for saving, deleting, and finding task data. To use this class, import it and create an instance with a parameter dictionary that includes a 'taskDir' key specifying the directory where task data should be stored. Then call the methods on the instance as needed:\n\n- To save task data, call the save_task method with a dictionary containing the data. If the dictionary does not already include an 'id' key, one will be generated and added to the dictionary. The task data will be stored in a file in a subdirectory of the taskDir directory named after the task ID.\n- To delete task data, call the delete_task method with the ID of the task to delete. This will remove the subdirectory containing the task data.\n- To find task data by ID, call the find_by_id method with the ID of the task to find. This will return the task data as a dictionary.\n- To find task data for multiple IDs, call the find_by_ids method with a list of IDs. This will return a list of task data dictionaries.\n- To find all task data, call the all_task_data method. This will return a list of all task data dictionaries, sorted by ID in ascending order by default (or descending order if the 'order' parameter is set to 'desc').\n- To get task data for a specific ID without raising an error if the data is not found, call the task_data method with the ID of the task to find. This will return the task data as a dictionary, or None if the task data is not found.", + "code_snippet": "import os\nimport json\nimport logging\nimport fcntl\n\nfrom jsub.util import safe_mkdir\nfrom jsub.util import safe_rmdir\nfrom jsub.error import RepoReadError\nfrom jsub.error import TaskNotFoundError\n\nID_FILENAME = 'id'\n\nclass FileSystem(object):\n\tdef __init__(self, param):\n\t\tself.__jsub_dir = os.path.expanduser(param.get('taskDir', '~/jsub/'))\n#\t\tself.__id_file = os.path.join(self.__jsub_dir, ID_FILENAME)\n\n\t\tself.__logger = logging.getLogger('JSUB')\n\n#\t\tself.__create_repo_dir()\n\n\t\tself.__json_format = param.get('format', 'compact')\n\n\tdef save_task(self, data):\n\t\tif 'id' not in data:\n\t\t\tsafe_mkdir(self.__jsub_dir)\n\t\t\tdata['id'] = self.__new_task_id()\n\t\tsafe_mkdir(os.path.join(self.__jsub_dir,str(data['id']),'taskInfo'))\n\t\ttask_path = os.path.join(self.__jsub_dir, str(data['id']),'taskInfo','repo')\n\n\t\tdata_str = self.__json_str(data)\n\t\twith open(task_path, 'a+') as f:\n\t\t\tfcntl.flock(f, fcntl.LOCK_EX)\n\t\t\tf.seek(0)\n\t\t\tf.truncate()\n\t\t\tf.write(data_str)\n\n\tdef delete_task(self, task_id):\n\t\tsafe_rmdir(os.path.join(self.__jsub_dir,str(task_id)))\n\n\tdef find_by_id(self, task_id):\n\t\treturn self.task_data(task_id)\n\n\tdef find_by_ids(self, task_ids):\n\t\tall_data = []\n\t\tfor task_id in task_ids:\n\t\t\ttry:\n\t\t\t\ttd = self.task_data(task_id)\n\t\t\t\tall_data.append(td)\n\t\t\texcept RepoReadError as e:\n\t\t\t\tself.__logger.debug(e)\n\t\treturn all_data\n\n\tdef all_task_data(self, order='asc'):\n\t\ttask_ids =[d for d in os.listdir(self.__jsub_dir) if os.path.isdir(os.path.join(self.__jsub_dir,d))]\n\t\ttask_ids.sort(key=int, reverse=(order=='desc'))\n\t\treturn self.find_by_ids(task_ids)\n\n\tdef task_data(self, task_id):\n\t\tsafe_mkdir(os.path.join(self.__jsub_dir,str(task_id),'taskInfo'))\n\t\ttask_path = os.path.join(self.__jsub_dir,str(task_id),'taskInfo','repo')\n\t\twith open(task_path, 'a+') as f:\n\t\t\tfcntl.flock(f, fcntl.LOCK_EX)\n\t\t\tf.seek(0)\n\t\t\tdata_str = f.read()\n\n\t\ttry:\n\t\t\treturn json.loads(data_str)\n\t\texcept ValueError as e:\n\t\t\traise RepoReadError('JSON decode error on task %s: %s' % (task_id, e))\n\n#\tdef __create_repo_dir(self):\n#\t\tsafe_mkdir(self.__jsub_dir)\n\n\tdef __new_task_id(self):\n\t\ttask_ids =[int(d) for d in os.listdir(self.__jsub_dir) if os.path.isdir(os.path.join(self.__jsub_dir,d))]\n\t\tif not task_ids:\n\t\t\treturn 1\n\t\ttask_ids.sort(key=int, reverse=True)\n\t\treturn(task_ids[0]+1)\n\n\tdef __json_str(self, data):\n\t\tif self.__json_format == 'pretty':\n\t\t\treturn json.dumps(data, indent=2)\n\t\treturn json.dumps(data, separators=(',', ':'))\n" + }, + { + "comment": "Create a visualization in Python using Matplotlib that displays the top 5 most popular programming languages.", + "code_snippet": "import matplotlib.pyplot as plt\n\nx_values = [\"Python\", \"Java\", \"C++\", \"Javascript\", \"C#\"]\ny_values = [45, 25, 15, 10, 5]\n\nplt.bar(x_values, y_values)\n\nplt.title(\"Popular Programming Languages\")\nplt.ylabel(\"Percentage of Use\")\nplt.xlabel(\"Languages\")\n\nplt.show()" + }, + { + "comment": "Generate a single Python line to create a list that contains elements from another list.", + "code_snippet": "new_list = [element for element in given_list]" + }, + { + "comment": "Write a Python function to find the maximum of three numbers.", + "code_snippet": "def max_of_three(a, b, c):\n if a > b and a > c:\n return a\n elif b > a and b > c:\n return b\n else:\n return c\n\nresult = max_of_three(3, 4, 5)\nprint(result)" + }, + { + "comment": "Write a Python program to scrape the table at the following URL and extract the planets information (name, mass, and distance from the sun).", + "code_snippet": "import requests\nfrom bs4 import BeautifulSoup\n\npage = requests.get('https://en.wikipedia.org/wiki/List_of_gravitationally_rounded_objects_of_the_Solar_System')\nsoup = BeautifulSoup(page.content, 'lxml')\n\nplanets_table = soup.find('table', class_='wikitable sortable')\n\nplanets = []\nfor row in planets_table.find_all('tr'):\n if row.find('th'):\n continue\n cells = row.find_all('td')\n planets.append({\n 'name': cells[0].text,\n 'mass': cells[2].text,\n 'distance_from_sun': cells[5].text\n })\n\nprint(planets)" + }, + { + "comment": "Create a Python dictionary to store data about a student.", + "code_snippet": "student_data = {\n 'name': 'John Smith',\n 'year': 'Freshman',\n 'gpa': 3.25,\n 'courses': ['math', 'english', 'Computer Science', 'Statistics'],\n 'extracurricular': ['Chess Club', 'Student Government', 'Debate Team']\n}" + }, + { + "comment": "Write a function in Python to remove duplicate entries from a list.", + "code_snippet": "def removeDuplicates(input): \n final_list = [] \n for num in input: \n if num not in final_list: \n final_list.append(num) \n return final_list \n\nlst = [1,2,2,3,3,4]\nprint(removeDuplicates(lst))" + }, + { + "comment": "Given the following array of numbers, write a Python program to find the maximum element.", + "code_snippet": "#import max from the built-in library\nfrom max import max\n\n#initialize array of numbers\nnumbers = [1, 43, 78, 3, 92, 42]\n\n#find maximum element\nprint(max(numbers))\n\n#output\n92" + }, + { + "comment": "Create a Python program to compress a given data sequence.", + "code_snippet": "def compress_sequence(nums):\n output = []\n temp = []\n\n for i in range(1, len(nums)):\n if nums[i-1] * nums[i] >= 0:\n temp.append(nums[i-1])\n else:\n temp.append(nums[i-1])\n output.append(temp)\n temp = []\n temp.append(nums[i])\n output.append(temp)\n\n return output\n\nprint(compress_sequence([9, -2, 6, 0, -7]))" + }, + { + "comment": "Write a function in python to split an given array into two parts with equal sum.", + "code_snippet": "def split_equal_sum(arr):\n n = len(arr) \n leftsum = 0\n rightsum = sum(arr)\n \n for i in range(n): \n rightsum -= arr[i]\n if leftsum == rightsum:\n return True \n leftsum += arr[i] \n return False" + }, + { + "comment": "Create a for loop in Python to calculate the total sum of all elements in the given list.", + "code_snippet": "total = 0\nfor element in data:\n total = total + element\nprint(total)" + }, + { + "comment": "Suppose you want to build an autocomplete feature for a text editor, develop a Python program to generate a list of suggestions based on a given prefix.", + "code_snippet": "def auto_complete(dictionary, prefix):\n # list for words with the given prefix\n prefix_words = []\n\n # iterate through the dictionary\n for word in dictionary:\n # if the word starts with given prefix, \n # add it to the list\n if word.startswith(prefix):\n prefix_words.append(word)\n\n return prefix_words\n\n\ndictionary = [\"dog\", \"deer\", \"deal\", \"dungeon\", \"dark\", \"dusk\"]\nprefix = \"d\"\n\nprefix_words = auto_complete(dictionary, prefix)\nprint(prefix_words)\n\n# Output:\n# ['dog', 'deer', 'deal', 'dungeon', 'dark', 'dusk']" + }, + { + "comment": "Design a neural network in Python to predict a customer\u2019s annual income based on their age and education level.", + "code_snippet": "import numpy as np\nimport keras \nfrom keras.layers import Dense\nfrom keras.models import Sequential\n\n# Build model\nmodel = Sequential()\nmodel.add(Dense(64, input_dim = 2, activation = 'relu'))\nmodel.add(Dense( 1))\n\n# Compile the model\nmodel.compile(loss = 'mean_squared_error',\n optimizer = 'adam')\n\n# Obtain some data - age (years), education level (high school, college, university)\nx_train = np.array([[25, 0], [30, 0], [35, 0], \n\t\t [40, 0], [45, 1], [45, 2],\n\t\t [50, 1], [50, 2], [55, 1],\n\t\t [55, 2], [50, 3], [60, 3]])\n\n# Labels - annual income\ny_train = [15000, 20000, 25000, 30000,\n 40000, 45000, 50000, 60000, \n\t 65000, 70000, 80000, 90000]\n\n# Fit the model\nmodel.fit(x_train, y_train, epochs = 150, batch_size = 2)\n\n# Calculate the predicted annual income\nage = 50\neducation = 3\nannualIncome = model.predict(np.array([[age, education]]))\nprint(annualIncome)\n# output: [[80000]]" + }, + { + "comment": "Create a python program to classify text documents into two classes: positive and negative.", + "code_snippet": "import nltk\nfrom nltk.classify import NaiveBayesClassifier \n \ndef label_words(words): \n return dict([(word, True) for word in words]) \n\npositive_tokens = nltk.word_tokenize(positiveText) \nnegative_tokens = nltk.word_tokenize(negativeText)\n\npositive_data = [(label_words(positive_tokens), 'positive')] \nnegative_data = [(label_words(negative_tokens), 'negative')]\n \ntrain_data = positive_data + negative_data \nclassifier = NaiveBayesClassifier.train(train_data) \n \n# classify \nclassifier.classify( \n\tlabel_words(nltk.word_tokenize(\"This movie was ok\")))" + }, + { + "comment": "Design a Python program to filter out the odd numbers from a list.", + "code_snippet": "def filter_odd_numbers(numbers):\n filtered_list = []\n for num in numbers:\n if num % 2 != 0:\n filtered_list.append(num)\n return filtered_list\n\nlist1 = [1,2,3,4,5,6]\nfiltered_list = filter_odd_numbers(list1)\nprint(filtered_list) # [1, 3, 5]" + }, + { + "comment": "Write a Python script to refresh a given table of data in a PostgreSQL database every 5 minutes.", + "code_snippet": "import psycopg2\nfrom datetime import datetime, timedelta\nimport schedule\nimport time\n\nconn = psycopg2.connect(\n host=\"localhost\", \n database=\"db\", \n user=\"user\", \n password=\"password\"\n)\n\ndef refresh_data():\n cur = conn.cursor()\n query = 'REFRESH TABLE \"TableName\";'\n cur.execute(query)\n conn.commit()\n\n# run the refresh_data job every 5 minutes\nschedule.every(5).minutes.do(refresh_data)\n\nwhile True:\n schedule.run_pending()\n time.sleep(1)" + }, + { + "comment": "Make the following Python code snippet more efficient.", + "code_snippet": "def determine_commission_rate(gross_sales):\n if gross_sales >= 10000:\n return 0.1\n elif gross_sales >= 5000:\n return 0.07\n else:\n return 0.05" + }, + { + "comment": "Construct a Python program that computes the area of a circle with the given radius.", + "code_snippet": "import math\n\ndef area_circle(radius):\n return math.pi * (radius ** 2)\n\narea = area_circle(radius)\nprint('The area of the circle is %.2f' % area)" + }, + { + "comment": "Implement a basic version control system in Python that keeps track of changes in the code.", + "code_snippet": "class VersionControl:\n def __init__(self, lines):\n self.lines = lines\n self.mark = 0\n \n def commit(self):\n self.mark += 1\n log_file = open(\"log_file.txt\", \"a+\")\n log_file.write(str(self.mark) + \": \" + str(self.lines))\n log_file.close()\n \n def restore(self):\n log_file = open(\"log_file.txt\", \"r\")\n lines = log_file.readlines()\n to_mark = self.mark-1\n \n self.lines = lines[to_mark]\n log_file.close()" + }, + { + "comment": "Write a Python program to solve the Towers of Hanoi game.", + "code_snippet": "def hanoi(n, source, spare, target):\n if n == 1:\n disk = source.pop(0)\n print(\"Moving disk {0} from {1} to {2}\".format(disk[0], disk[1], target[1]))\n target.insert(0, disk)\n else:\n hanoi(n-1, source, target, spare)\n disk = source.pop(0)\n print(\"Moving disk {0} from {1} to {2}\".format(disk[0], disk[1], target[1]))\n target.insert(0, disk)\n hanoi(n-1, spare, source, target)\n\nn = 8\nsource = [(i, \"A\") for i in range(1, n+1)]\ntarget = []\nspare = []\n\nhanoi(n, source, spare, target)" + }, + { + "comment": "Analyze a dataset of customers and create a classification model in scikit-learn to determine customer churn.", + "code_snippet": "# Python code\nimport pandas as pd\nimport numpy as np\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.model_selection import train_test_split\n\n# Load the data set\ndata = pd.read_csv(\"customer_data.csv\")\n\n# Select features\nX = data.drop(['churn'], axis=1)\n\n# Select targets\ny = data['churn']\n\n# Split the data into training and testing sets\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n\n# Create the model\nmodel = RandomForestClassifier()\n\n# Train the model\nmodel.fit(X_train, y_train)\n\n# Make predictions and measure accuracy\npredictions = model.predict(X_test)\nprint(model.score(X_test, y_test))" + }, + { + "comment": "Create a Django application with a model that stores the name, address, and email of a user.", + "code_snippet": "# create Django app\ndjango-admin startproject myapp\n\n# create user model\nfrom django.db import models\n\nclass User(models.Model): \n name = models.CharField(max_length=100) \n address = models.CharField(max_length=100)\n email = models.EmailField(max_length=100)\n\n# migrate models\npython manage.py makemigrations\npython manage.py migrate" + }, + { + "comment": "Create a function in Python to convert a given number into its Roman form.", + "code_snippet": "def int_to_Roman(number): \n \n val = [ \n 1000, 900, 500, 400, \n 100, 90, 50, 40,\n 10, 9, 5, 4,\n 1\n ] \n syb = [ \n \"M\", \"CM\", \"D\", \"CD\", \n \"C\", \"XC\", \"L\", \"XL\", \n \"X\", \"IX\", \"V\", \"IV\",\n \"I\"\n ] \n roman_num = '' \n i = 0\n while number > 0:\n for_val = number // val[i]\n roman_num += syb[i] * for_val\n number -= val[i] * for_val\n i += 1\n return roman_num" + }, + { + "comment": "Create a linked-list in Python that can traverse in both directions.", + "code_snippet": "class Node:\n def __init__(self, data):\n self.data = data\n self.next = None\n self.prev = None\n\n\nclass DoubleLinkedList:\n def __init__(self): \n self.head = None\n \n def append(self, new_data): \n new_node = Node(new_data) \n if self.head is None:\n self.head = new_node\n else: \n curr_node = self.head\n while curr_node.next != None:\n curr_node = curr_node.next\n curr_node.next = new_node\n new_node.prev = curr_node\n \n def prepend(self, new_data):\n new_node = Node(new_data) \n if self.head is None: \n self.head = new_node\n return\n new_node.next = self.head\n self.head.prev = new_node \n self.head = new_node\n\n def reverse(self):\n curr_node = self.head\n while curr_node:\n temp = curr_node.next\n curr_node.next = curr_node.prev\n curr_node.prev = temp\n curr_node = curr_node.prev\n if temp:\n self.head = temp.prev" + }, + { + "comment": "Program a python code to convert Celsius to Fahrenheit.", + "code_snippet": "def celsiusToFahrenheit(temp):\n return round((temp * 9/5) + 32, 2)" + }, + { + "comment": "Create a machine learning algorithm in Python to categorize a dataset of records.", + "code_snippet": "import numpy as np\nfrom sklearn.svm import SVC \n\n# Dataset \ntraining_data = [\n {\"label\": \"apple\", \"features\": [1, 1, 0]}, \n {\"label\": \"banana\", \"features\": [0, 1, 1]}, \n {\"label\": \"lemon\", \"features\": [0, 0, 1]}, \n {\"label\": \"orange\", \"features\": [1, 0, 0]}, \n {\"label\": \"pear\", \"features\": [0, 1, 0]}, \n]\n\n# Feature extraction \nX = []\ny = []\nfor data in training_data:\n X.append(data['features'])\n y.append(data['label'])\n\n# Model \nclf = SVC()\nclf.fit(X, y) \n\n# Prediction \npredict_data = [1, 0, 1]\npredicted_label = clf.predict([predict_data])\n\nprint(predicted_label[0])" + }, + { + "comment": "Create an application in Python for encryption and decryption of strings. The application should take a string as input, and use a cipher text algorithm of choice to encrypt the string.", + "code_snippet": "from Crypto.Cipher import AES\n\nkey = b'Sixteen byte key'\n\ndef encrypt(plaintext):\n cipher = AES.new(key, AES.MODE_ECB)\n return cipher.encrypt(plaintext)\n\ndef decrypt(ciphertext):\n cipher = AES.new(key, AES.MODE_ECB)\n return cipher.decrypt(ciphertext)" + }, + { + "comment": "Suggest a valid Python syntax to calculate the sum of two variables using lambda", + "code_snippet": "sum = lambda a, b : a + b\nprint(sum(3,4))" + }, + { + "comment": "Generate code for a Machine Learning model in Python which can recognize handwritten digits from 0 to 9", + "code_snippet": "# Importing necessary libraries\n\nimport numpy as np\nimport pandas as pd\nfrom tensorflow.keras.datasets import mnist\nfrom tensorflow.keras.layers import Dense, Input, Dropout\nfrom tensorflow.keras.models import Model\nfrom tensorflow.keras.utils import to_categorical\n\n# Loading data\n(X_train, y_train), (X_test, y_test) = mnist.load_data()\n\n# Defining the neural network model\n\n# Input layer\ninputs = Input(shape=(28,28,))\n\n# Hidden layer\nh1 = Dense(64, activation='relu')(inputs)\nh2 = Dropout(0.5)(h1)\n\n# Output layer\noutputs = Dense(10, activation='softmax')(h2)\n\n# Compiling the model\nmodel = Model(inputs=inputs, outputs=outputs)\nmodel.compile(loss='categorical_crossentropy',\n optimizer='adam',\n metrics=['accuracy'])\n\n# Data pre-processing\n\n# Reshape inputs from (60000, 28, 28) to (60000, 784)\nX_train = X_train.reshape(60000, 784)\nX_test = X_test.reshape(10000, 784)\n\n# Normalize pixel values\nX_train = X_train.astype('float32')\nX_train /= 255\nX_test = X_test.astype('float32')\nX_test /= 255\n\n# One-hot encode labels\ny_train = to_categorical(y_train, num_classes=10)\ny_test = to_categorical(y_test, num_classes=10)\n\n# Fitting the model\nmodel.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=5, batch_size=128)" + }, + { + "comment": "Explain the difference between a ``list`` and a ``tuple`` in Python.", + "code_snippet": "The difference between a list and a tuple in Python is that a list is mutable, meaning its contents can be changed, while a tuple is immutable, meaning its contents cannot be changed. A list has square brackets around it, while a tuple has parentheses around it. Lists are used when the data needs to be changed, while tuples are used when the data is fixed and cannot be changed." + }, + { + "comment": "Generate a Python program to calculate the Fibonacci number at a given position.", + "code_snippet": "def fibonacci(n):\n if n == 0:\n return 0\n elif n == 1:\n return 1\n else:\n return fibonacci(n-1) + fibonacci(n-2)\n\nn = 10\nprint(fibonacci(n))" + }, + { + "comment": "Create a program in Python to generate a Random Password of length 8 characters, which contains at least one of each character type (lowercase, uppercase, digits, special characters).", + "code_snippet": "import string\nimport random\n\n# length of password\nlength = 8\n\n# special characters\nspecial_characters = \"!#$%&'()*+,-./:;<=>?@[\\]^_`{|}~\"\n\n# generating password\npassword = random.choice(string.ascii_lowercase)\npassword += random.choice(string.ascii_uppercase)\npassword += random.choice(string.digits)\npassword += random.choice(special_characters)\n\nfor i in range(length):\n password += random.choice(string.ascii_lowercase + string.ascii_uppercase + string.digits + special_characters)\n\npassword = ''.join(random.sample(password,len(password)))\n\nprint (\"Password is: \", password)" + }, + { + "comment": "Write a Python program to extract all the words starting with a specific letter", + "code_snippet": "\"\"\"\nExtract all the words starting with a specific letter\n\"\"\"\n\ndef extract_words(word, letter):\n words_list = []\n for w in word.split():\n if w[0] == letter:\n words_list.append(w)\n return words_list\n\nif __name__ == '__main__':\n words = 'This is a sample sentence to test the program'\n letter = 's'\n print(extract_words(words, letter))" + }, + { + "comment": "Implement a hash table in Python that supports insert, delete and retrieve operations.", + "code_snippet": "class HashTable:\n def __init__(self):\n self.table = [[] for _ in range(256)]\n \n def insert(self, key, value):\n h = hash(key) % 256\n for i, kv in enumerate(self.table[h]):\n if kv[0] == key:\n self.table[h][i] = (key, value)\n return\n self.table[h].append((key, value))\n \n def retrieve(self, key):\n h = hash(key) % 256\n for kv in self.table[h]:\n if kv[0] == key:\n return kv[1]\n \n def delete(self, key):\n h = hash(key) % 256\n for i, kv in enumerate(self.table[h]):\n if kv[0] == key:\n del self.table[h][i]" + }, + { + "comment": "Create a search engine in Python to quickly search through a given array.", + "code_snippet": "# Define the array\narray = ['school', 'hospital', 'bookstore', 'mall', 'theatre']\n\n# Function to perform linear search\ndef linearSearch(search_item):\n # Loop through array\n for i in range(len(array)):\n # If search item is found, return its position\n if array[i] == search_item:\n return i\n # If item is not found, return -1\n return -1\n\n# Input search item\ns = input(\"Enter the item to be searched: \")\n\n# Search the item\nresult = linearSearch(s)\n\n# Print result\nif result == -1:\n print(\"Item not found.\")\nelse:\n print(\"Item is found at position \"+str(result))" + }, + { + "comment": "Create a function in Python that prints out the largest value from a given list of numbers.", + "code_snippet": "def get_max_value(numbers): \n return max(numbers)\n\nprint(get_max_value(list)) # 6" + }, + { + "comment": "Write a python script that takes a list of numbers from 1 to 8 and prints its cubic values.", + "code_snippet": "list_numbers = [1, 2, 3, 4, 5, 6, 7, 8]\n\nfor num in list_numbers:\n print(num**3)" + }, + { + "comment": "Create a dictionary in Python with key-value pairs", + "code_snippet": "my_dict = {\n \"FirstName\": \"John\"\n}" + }, + { + "comment": "Write a Python code to find the number of unique paths from a given source to a destination.", + "code_snippet": "def num_unique_paths(m, n):\n dp = [[0 for x in range(m)] for y in range(n)] \n \n # Count of paths to reach any\n # cell in first column is 1 \n for i in range(n): \n dp[i][0] = 1\n \n # Count of paths to reach any\n # cell in first column is 1 \n for j in range(m): \n dp[0][j] = 1\n \n # Calculate count of paths for other \n # cells in bottom-up manner using\n # the recursive solution \n for i in range(1, n): \n for j in range(1, m): \n \n # By uncommenting the last\n # part the code calculatest he total\n # possible paths if the diagonal \n # Movements are allowed \n dp[i][j] = dp[i-1][j] + dp[i][j-1]# + dp[i-1][j-1]\n \n return dp[n-1][m-1]" + }, + { + "comment": "Generate a Python program to display a Fibonacci sequence of first n numbers.", + "code_snippet": "def Fibonacci(n):\n if n==1:\n return 0\n elif n==2:\n return 1\n else:\n return Fibonacci(n-1)+Fibonacci(n-2)\n\nn = 10\nfor i in range(1, n):\n print(Fibonacci(i))" + }, + { + "comment": "This code is a Python script that provides a command-line interface to download subtitles for movies and TV shows. To use it, run the script and provide one or more release paths as arguments. You can also specify the language and subtitle site to use. The available commands are \"download\", \"exist\", \"test\", \"list\", \"help\", and \"version\".", + "code_snippet": "#!/usr/bin/env python\n#-*- coding:utf-8 -*-\n########################################################\n# Name: Subtitles Client\n# Site: http://RealGame.co.il\n__author__ = 'RealGame (Tomer Zait)'\n__license__ = 'GPL v3'\n__version__ = '1.0'\n__email__ = 'realgam3@gmail.com'\n########################################################\n\nfrom os import path\nfrom sys import argv\nfrom docopt import docopt\n\nfrom engines.engine import SubtitleSite, SUBTITLE_SITE_LIST, DEFAULTS\n\n\n__doc__ = \\\n\"\"\"\nSubtitles Client\n\nUsage:\n {prog} download ... [--lang= --engine=...]\n {prog} exist ... [--lang= --engine=...]\n {prog} test [...]\n {prog} (-l | --list)\n {prog} (-h | --help)\n {prog} (-v | --version)\n\nOptions:\n -l --list Show subtitles engine list.\n -h --help Show this screen.\n -v --version Show version.\n --lang= Subtitle language (alpha2) [default: {def_language}].\n --engine= Subtitle site [default: {def_engine}].\n\"\"\".format(prog=path.basename(argv[0]),\n def_language=DEFAULTS['subtitle_language'],\n def_engine=DEFAULTS['subtitle_engine'])\n\n\ndef download_subtitles(releases, engines=[DEFAULTS['subtitle_engine']], lang=DEFAULTS['subtitle_language']):\n if releases:\n for release in releases:\n for engine in engines:\n subtitle_release = SubtitleSite.get_file_properties(release)['release_name']\n print \"[{engine: ^15}] Trying To Download Subtitles For: '{release}'\".format(engine=engine,\n release=subtitle_release)\n sub_obj = SubtitleSite.class_factory(engine)\n subtitle_path = sub_obj.download_subtitle(release, lang)\n if subtitle_path:\n print \"{0:17} Download Success: ({file_path}).\\n\".format(\"\", file_path=subtitle_path)\n else:\n print \"{0:17} Subtitles Not Found.\\n\".format(\"\")\n\n\ndef is_subtitles_exist(releases, engines=[DEFAULTS['subtitle_engine']], lang=DEFAULTS['subtitle_language']):\n if releases:\n for release in releases:\n for engine in engines:\n subtitle_release = SubtitleSite.get_file_properties(release)['release_name']\n\n sub_obj = SubtitleSite.class_factory(engine)\n exist_flag = sub_obj.is_subtitle_exist(release, lang)\n res = \"Exist\"\n if not exist_flag:\n res = \"Does Not \" + res\n print \"[{engine: ^15}] '{release}' - {res}.\".format(engine=engine,\n release=subtitle_release,\n res=res)\n\n\ndef test_engines(engines):\n if not engines:\n engines = SUBTITLE_SITE_LIST.keys()\n\n for engine_key in engines:\n t = SubtitleSite.class_factory(engine_key)\n t.test_engine()\n\n\ndef main():\n args = docopt(__doc__, help=True, version='Subtitles Client %s' % __version__)\n\n if args['download']:\n download_subtitles(args[''], args['--engine'], args['--lang'])\n elif args['exist']:\n is_subtitles_exist(args[''], args['--engine'], args['--lang'])\n elif args['test']:\n test_engines(args[''])\n elif args['--list']:\n for sub_site in SUBTITLE_SITE_LIST.keys():\n sub_dict = SUBTITLE_SITE_LIST.get(sub_site)\n print sub_dict.get('class_name')\n\n\nif __name__ == \"__main__\":\n main()\n" + }, + { + "comment": "Optimize the following Python program to find the maximum number in a list.", + "code_snippet": "def max_num(list):\n max_num = list[0]\n\n for num in list:\n if num > max_num:\n max_num = num\n \n return max_num" + }, + { + "comment": "Develop an algorithm in Python that takes a list of strings as an input and sorts them alphabetically.", + "code_snippet": "def sortAlphabetically(strings): \n strings.sort() \n return strings \n\n#Test \nstrings = [\"hello\", \"world\", \"good\", \"morning\", \"programming\"]\nsorted_strings = sortAlphabetically(strings) \nprint(*sorted_strings)" + }, + { + "comment": "Create a Python program to print a formatted table of grades for a given set of students.", + "code_snippet": "def print_grade_table(students):\n max_name_length = max(len(student['name']) for student in students)\n line_template = \"{{name:<{max_name_length}}} {{grade}}\".format(max_name_length=max_name_length)\n header = line_template.format(name='Name', grade='Grade')\n sep = '-' * len(header)\n print(header)\n print(sep)\n for student in students:\n print(line_template.format(**student))" + }, + { + "comment": "Design a Python code snippet that deletes all duplicates in a given list of numbers.", + "code_snippet": "def deleteDuplicates(list): \n final_list = [] \n for num in list: \n if num not in final_list: \n final_list.append(num) \n return final_list" + }, + { + "comment": "Write a Python program to visually summarize the Twitter data.", + "code_snippet": "import matplotlib.pyplot as plt\n\n# Process the Twitter data to extract\n# the relevant data fields\n# ...\n\n# Plot the tweet authors\nplt.xlabel('Tweet Authors')\nplt.ylabel('Number of tweets')\nplt.title('Tweet Author Summary')\nplt.bar(authors, num_tweets)\nplt.show()\n\n# Plot the number of likes\nplt.xlabel('Tweets')\nplt.ylabel('Number of Likes')\nplt.title('Number of Likes Summary')\nplt.bar(tweets, num_likes)\nplt.show()\n\n# Plot the number of retweets\nplt.xlabel('Tweets')\nplt.ylabel('Number of Retweets')\nplt.title('Number of Retweets Summary')\nplt.bar(tweets, num_retweets)\nplt.show()" + }, + { + "comment": "Develop a function in Python that takes in a string of sentences and removes all the punctuations.", + "code_snippet": "import string\n\ndef remove_punctuations(s):\n return s.translate(str.maketrans('', '', string.punctuation))\n\nif __name__ == '__main__':\n sentence = \"This is a sample string!\"\n print(remove_punctuations(sentence))" + }, + { + "comment": "Create a model in Python using Linear Regression that can predict the housing prices given the living area and number of rooms.", + "code_snippet": "import numpy as np\nfrom sklearn import linear_model\n\nX = np.array([[2000, 3], [1600, 2.5], [2400, 3.5], [1400, 2], [3000, 4]])\ny = np.array([75000, 43000, 59000, 32000, 85000])\n\nmodel = linear_model.LinearRegression()\nmodel.fit(X, y)\n\nprediction = model.predict([[2200, 3]])" + }, + { + "comment": "Create a loop in Python that prints each item of the following list with a message:", + "code_snippet": "my_list = ['A', 'B', 'C', 'D']\n\nfor item in my_list:\n print(\"The item is: {}\".format(item))" + }, + { + "comment": "Develop a regular expression in Python to match all numeric strings.", + "code_snippet": "import re \n \n# Regular expression to match all numeric strings \npattern = '^[0-9]+$'\n \n# Input string \nstring = '123456789'\n \n# Matching a pattern \nresult = re.match(pattern, string) \nif result: \n print(\"String is a valid numeric string\") \nelse: \n print(\"String isn't a valid numeric string\")" + }, + { + "comment": "Create a BPMN diagram for FOMDES process 1 using Python and the bpmn_pgv library. The diagram should include clusters for Secretar\u00eda Ejecutiva, Presidencia, and Presupuesto/Administraci\u00f3n, with specific tasks and edges defined within each cluster. Additionally, include nodes for SIGEFOMDES Administraci\u00f3n and SISAC, and connect them to the appropriate clusters using dashed edges. Finally, save the diagram as a PNG file named \"proc4.png\" and a DOT file named \"proc4.dot\".", + "code_snippet": "#!/usr/bin/env python\n# -*- coding: utf-8 -*-\n\n\"\"\"\nBPMN diagram for FOMDES process 1\n\"\"\"\n\nfrom __future__ import absolute_import\nfrom __future__ import division\nfrom __future__ import print_function\nfrom __future__ import unicode_literals\n\nfrom bpmn_pgv import *\nimport pygraphviz as pgv\n\n__author__ = 'mapologo'\n\nPROCESS_LABEL = \"Liquidaci\u00f3n de Cr\u00e9ditos\"\n# A graph for FOMDES processes\nF = pgv.AGraph(strict=False, directed=True)\n\nF.graph_attr.update(label=\"\", rankdir=\"TB\", splines=\"ortho\", labelloc=\"b\",\n size=\"8, 7.5\", forcelabels=\"true\", ranksep=\"0.25\", fontname=\"Liberation Sans Narrow Condensed\")\nF.node_attr.update(fontname=\"Liberation Sans Narrow Condensed\")\nF.edge_attr.update(fontname=\"Liberation Sans Narrow Condensed\", fontsize=\"10\")\n\nse_cluster = {\"se7\": (\"Recibir el documento protocolizado\", \"start\"),\n \"se8\": (\"Revisar el documento protocolizado\", \"human\"),\n \"se9\": (\"\", \"end\")}\n\nse_edges = {\"se7\": {\"se8\": {}},\n \"se8\": {\"se9\": {\"style\": \"invis\"}}}\n\nSE = add_cluster(F, \"se\", \"Secretar\u00eda Ejecutiva\", se_cluster, se_edges)\n\np_cluster = {\"p1\": (\"Firma del cheque\", \"human\"),\n \"p2\": (\"Entregar cheque a beneficiario\", \"message\")}\n\np_edges = {\"p1\":{\"p2\": {}}}\n\nP = add_cluster(F, \"p\", \"Presidencia\", p_cluster, p_edges)\n\npr_cluster = {\"pr2\": (\"Verificar documentaci\u00f3n legal y elaborar certificaci\u00f3n de disponibilidad\", \"human\"),\n \"pr3\": (\"Crear las cuentas por cobrar\", \"human\"),\n \"pr4\": (\"Generar tablas de amortizaci\u00f3n\", \"human\"),\n \"pr5\": (\"Imprimir y firmar orden de liquidaci\u00f3n y cheque\", \"human\")}\n\npr_edges = {\"pr2\": {\"pr3\": {}},\n \"pr3\": {\"pr4\": {}},\n \"pr4\": {\"pr5\": {}}}\n\nPR = add_cluster(F, \"pr\", \"Presupuesto/Administraci\u00f3n\", pr_cluster, pr_edges)\n\nF.add_node(\"SIGEFOMDES Administraci\u00f3n\", image=IMAGE_PATH + \"database.png\", shape=\"plaintext\", label=\"\", xlabel=\"SIGEFOMDES Administraci\u00f3n\")\nF.add_node(\"SISAC\", image=IMAGE_PATH + \"database.png\", shape=\"plaintext\", label=\"\", xlabel=\"SISAC\")\n\nglobal_edges = {\"Beneficiario\": {\"se7\": {\"style\": \"dashed\"}},\n \"se8\": {\"pr2\": {\"style\": \"dashed\"}},\n \"pr3\": {\"SIGEFOMDES Administraci\u00f3n\": {\"style\": \"dashed\"}},\n \"pr4\": {\"SISAC\": {\"style\": \"dashed\"}},\n \"pr5\": {\"p1\": {\"style\": \"dashed\"}},\n \"p2\": {\"se9\": {\"style\": \"dashed\"}, \"Beneficiario\": {\"style\": \"dashed\"}}}\n\nadd_edges(F, global_edges)\n\nF.draw(\"proc4.png\", prog='dot')\nF.write(\"proc4.dot\")\n" + }, + { + "comment": "Write a python function to calculate an exponential series", + "code_snippet": "def exponential_series(x,n):\n sum = 0\n for i in range(n+1):\n sum += x ** i\n return sum" + }, + { + "comment": "Design an algorithm in Python to add two large numbers represented as strings.", + "code_snippet": "def add_large_numbers(num1, num2):\n num1 = num1[::-1]\n num2 = num2[::-1] \n carry = 0\n result = \"\" \n \n # Iterate over the length of the length of larger string \n for i in range(max(len(num1), len(num2))): \n if i < len(num1):\n digit1 = int(num1[i])\n else:\n digit1 = 0\n\n if i < len(num2):\n digit2 = int(num2[i])\n else:\n digit2 = 0\n \n sum_total = digit1 + digit2 + carry \n carry = sum_total // 10 \n\n result += str(sum_total % 10) \n\n # Add remaining carry \n if carry > 0: \n result += str(carry) \n\n return result[::-1]\n\nprint(add_large_numbers(\"111\", \"1337\"))\n# Output: 1448" + }, + { + "comment": "Design an algorithm in Python, that takes an array and sort it in ascending order.", + "code_snippet": "def sort_ascending(array):\n array.sort()\n return array\n\nsort_ascending([10, 3, 9, 1, 15, 2]) # [1, 2, 3, 9, 10, 15]" + }, + { + "comment": "Complete the Python class below so that it can calculate the sum of two integers.", + "code_snippet": "class Sum:\n def __init__(self, x, y):\n self.x = x\n self.y = y\n \n def add(self):\n return self.x + self.y" + }, + { + "comment": "Suggest a completion for the following Python code to check if a given number is an Armstrong number.", + "code_snippet": "def is_armstrong_number(num):\n order = len(str(num))\n sum = 0\n temp = num\n while temp > 0:\n digit = temp % 10\n sum += digit ** order\n temp //= 10\n if num == sum:\n return True \n else:\n return False" + }, + { + "comment": "Write an algorithm in Python that finds the maximum sum of an array of non-negative integers, given an upper limit value.", + "code_snippet": "def findMaxSum(limit, array):\n maxSum = 0\n for i in range(len(array)):\n currentSum = 0\n for j in range(i, len(array)):\n currentSum += array[j]\n if currentSum <= limit and currentSum > maxSum:\n maxSum = currentSum\n return maxSum" + }, + { + "comment": "Write a Python program to generate n-grams of the given text with a window size of 3.", + "code_snippet": "from collections import Counter\n\n\ndef generate_ngrams(text, window_size):\n text = text.lower().split(' ')\n ngrams = []\n for n in range(window_size, len(text)+1):\n for i in range(len(text)-n+1):\n ngrams.append(' '.join(text[i:i+n]))\n return Counter(ngrams)\n\n\ntext = \"The quick brown fox jumps over the lazy dog\"\nwindow_size = 3\n\nngrams_count = generate_ngrams(text, window_size)\n\nprint('\\nNGRAMS COUNT:',ngrams_count)\n\n# Output\n# NGRAMS COUNT: Counter({'the quick brown': 3,\n# 'quick brown fox': 2,\n# 'over the lazy': 1,\n# 'brown fox jumps': 1,\n# 'fox jumps over': 1,\n# 'the lazy dog': 1,\n# 'jumps over the': 1,\n# 'quick brown fox jumps': 1})" + }, + { + "comment": "Output the Fibonacci sequence as a list of integers in Python", + "code_snippet": "def fibonacci_sequence(n): \n # Creating list of length n \n fib_list = [0]*n \n \n # first and second terms \n fib_list[0] = 0\n fib_list[1] = 1\n \n # Application of the recursive algorithm \n for i in range(2, n): \n fib_list[i] = fib_list[i-1] + fib_list[i-2] \n \n return fib_list" + }, + { + "comment": "Develop a machine learning algorithm in Python to identify apples from bananas.", + "code_snippet": "import numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.svm import SVC\n\n# Load data\nX = np.array([[1,2,3], [-1,2,5], [-1,2,7], [5,5,5], [3,3,3], [5,5,7]])\ny = np.array([0, 0, 0, 1, 0, 1])\n\n# Initialize classifier\nclf = SVC(kernel='linear')\n\n# Fit data\nclf.fit(X, y)\n\n# Test classifier\nx_test = np.array([[2,2,2], [4,4,4]])\n\nprediction = clf.predict(x_test)\nprint(prediction)" + }, + { + "comment": "Write a Python program to convert a list of lists into a dictionary where the first element of each list is the key and the rest of the elements are the values.", + "code_snippet": "dict1 = {}\nfor lst in [list1, list2]:\n key = lst[0]\n val = lst[1:]\n dict1[key] = val\n\nprint(dict1)\n# Output: {'apple': ['red', 'green'], 'banana': ['yellow', 'green']}" + }, + { + "comment": "Generate a fuzzy search algorithm in Python that can match strings with near similarities.", + "code_snippet": "def find_matches(query, choices, threshold):\n # Convert query and choices to lower case\n query, choices = query.lower(), [choice.lower() for choice in choices]\n # Initialize empty list to store matches\n matches = []\n \n # Loop through each choice\n for choice in choices:\n # Calculate Levenshtein distance between query and choice\n distance = levenshtein_distance(query, choice)\n # If distance is less than threshold, append choice to matches\n if distance <= threshold:\n matches.append(choice)\n \n return matches" + }, + { + "comment": "Write a Python program to generate a random password. The program should generate a string of 8 characters (numbers, letters and special characters)", + "code_snippet": "import random\nimport string\n\ndef generate_password():\n password_characters = string.ascii_letters + string.digits + string.punctuation\n password = ''.join(random.choice(password_characters) for i in range(8))\n print(password)\n\ngenerate_password()" + }, + { + "comment": "Is it possible to create a HTML page that displays a table of contents from the data stored in a database?", + "code_snippet": "Yes, it is possible to create a HTML page that displays a table of contents from the data stored in a database. You can use HTML, CSS, and JavaScript to create the table and a server-side language such as PHP or Python to query the database and retrieve the relevant data. Once you have the data, you can use JavaScript to render the table within the HTML page." + }, + { + "comment": "Write a Python program to compute the average salary of people over a given range of numbers from the dataset 'salary.csv'.", + "code_snippet": "import csv\n\ndef get_average_salary(filename):\n total = 0\n count = 0\n\n with open(filename, 'r') as csvfile:\n csvreader = csv.reader(csvfile)\n next(csvreader) # skip header\n\n # loop through the CSV rows\n for row in csvreader:\n total += int(row[1])\n count += 1\n \n return total/count\n\naverage_salary = get_average_salary('salary.csv')\nprint('Average Salary: ', average_salary)\n\n# Output: \n# Average Salary: 2000.0" + }, + { + "comment": "Implement a supervised learning algorithm in Python for a classification task using the iris dataset.", + "code_snippet": "# Import necessary libraries\nimport numpy as np\nimport pandas as pd\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.neighbors import KNeighborsClassifier\nfrom sklearn.metrics import confusion_matrix\n\n# Load and explore the data\ndataset = pd.read_csv('iris.csv')\nX = dataset.iloc[:, :-1].values\ny = dataset.iloc[:, -1].values\n\n# Split the dataset into training and test sets\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)\n\n# Preprocess the data\nscaler = StandardScaler()\nX_train = scaler.fit_transform(X_train)\nX_test = scaler.transform(X_test)\n\n# Define the model\nclf = KNeighborsClassifier(n_neighbors=5)\n\n# Fit the model to the training set\nclf.fit(X_train, y_train)\n\n# Make predictions on the test set\ny_pred = clf.predict(X_test)\n\n# Evaluate the model's performance\ncm = confusion_matrix(y_test, y_pred)\nprint(cm)" + }, + { + "comment": "Improve the following Python code to check if a number is in the range of 1 to 10 (inclusive).", + "code_snippet": "def check_number(num):\n if (num > 0 and num < 11):\n return True\n else:\n return False" + }, + { + "comment": "Create an algorithm in Python to generate the first n elements of the Sierpinski triangle.", + "code_snippet": "def sierpinski_triangle(n): \n # Drawing lines for first row \n for i in range(2**n): \n print(' ', end=\" \") \n \n # loop for printing n rows \n for i in range(1, n+1): \n # bit pattern for first and \n # last line of each row \n for j in range(2**(i-1), 2**i): \n print('1', end=\" \") \n print() \n \n # inner rows have opposite bit pattern \n if i > 1: \n for j in range(2**(i-1)+1, 2**i-1): \n print('0', end=\" \") \n print() \n \nsierpinski_triangle(6)" + }, + { + "comment": "Write a function in Python to count the number of vowels in a string.", + "code_snippet": "def count_vowels(string):\n count = 0\n vowels = set(\"aeiouAEIOU\")\n for letter in string:\n if letter in vowels:\n count += 1\n return count" + }, + { + "comment": "Implement a function f() in Python that takes an argument n and prints the multiplication table of n", + "code_snippet": "def f(n):\n for i in range(1, 11):\n print(\"{} * {} = {}\".format(n, i, n*i))" + }, + { + "comment": "Write a python program to optimize the given objective function.", + "code_snippet": "from scipy.optimize import minimize \n\ndef obj_func(x):\n # objective function\n # ...\n\ndef optimize(): \n # set up optimization problem\n x0 = [0.0, 0.0, 0.0] \n res = minimize(obj_func, x0, method='nelder-mead', options={'xtol': 1e-8, 'disp': True}) \n return res.x\n\nprint(optimize())" + }, + { + "comment": "Create a Python generator that returns numbers from a given list.", + "code_snippet": "def list_generator(input_list):\n for item in input_list:\n yield item\n\ngen = list_generator([1,3,5,7,9])\nfor item in gen:\n print(item)" + }, + { + "comment": "Develop a Python program to create an autocomplete system that predicts words based on the input text.", + "code_snippet": "def autocomplete(input_text):\n # Generate the word candidates\n word_candidates = generate_candidates(input_text)\n # Filter the word candidates \n filtered_words = filter_candidates(word_candidates)\n # Rank the words and return the top 5 \n sorted_words = rank_words(filtered_words)\n return sorted_words[:5]" + }, + { + "comment": "Write a Python code snippet to print the unique elements of a given list.", + "code_snippet": "def print_unique(my_list):\n seen = set()\n for x in my_list:\n if x not in seen:\n print(x)\n seen.add(x)" + }, + { + "comment": "Create a Python program that convert a given string to a list of integers.", + "code_snippet": "def to_int_list(s):\n return [int(x) for x in s.split(',')]" + }, + { + "comment": "Create a basic blockchain in Python using only the core modules", + "code_snippet": "import hashlib\nimport json\n\nclass Blockchain:\n\n def __init__(self):\n self.chain = []\n self.transactions = []\n\n def create_block(self, nonce, previous_hash):\n block = {\n 'block_number': len(self.chain) + 1,\n 'timestamp': self._current_timestamp(),\n 'nonce': nonce,\n 'previous_hash': previous_hash\n }\n self.chain.append(block)\n return block\n\n def _current_timestamp(self):\n return round(time.time() * 1000)\n\n def hash_block(self, block):\n string_object = json.dumps(block, sort_keys=True)\n block_string = string_object.encode()\n raw_hash = hashlib.sha256(block_string)\n hex_hash = raw_hash.hexdigest()\n return hex_hash" + }, + { + "comment": "Create a Python program to remove all duplicate elements from a list and preserving the original order.", + "code_snippet": "# Program to remove duplicate elements\n\ndef remove_duplicates(nums):\n new_list = []\n for num in nums:\n if num not in new_list:\n new_list.append(num)\n \nreturn new_list\n\n# Main program\nnums = [1, 3, 3, 5, 5, 7]\ndedup_nums = remove_duplicates(nums)\nprint(dedup_nums) # [1, 3, 5, 7]" + }, + { + "comment": "Generate a voltage signal with random noise using a Python script.", + "code_snippet": "import numpy as np \n\nfreq = 250 \namplitude = 1 \n\nt= np.linspace(0, 1, 1000) \nvoltage_signal = np.sin(2*np.pi*freq*t) + np.random.normal(0, 0.1, 1000)" + }, + { + "comment": "Write a Python function that takes an array of numbers and returns an array with the greatest and smallest values.", + "code_snippet": "def min_max(arr):\n min_val = min(arr)\n max_val = max(arr)\n return [min_val, max_val]\n\nresult = min_max([3, 4, 2, 8, 15, 6])\nprint(result)" + }, + { + "comment": "Construct a Python code that accepts a string as an input and returns the total number of vowels in the string", + "code_snippet": "def count_vowels(string): \n vowels = \"aeiou\"\n count = 0\n for s in string:\n if s.lower() in vowels:\n count += 1\n return count" + }, + { + "comment": "Generate a Python program that prints out every number from 0 to 10, but with every number being multiplied by 2.", + "code_snippet": "for i in range(11):\n print(i*2)" + }, + { + "comment": "Create a Python program that takes a string and indicates whether it is a palindrome or not.", + "code_snippet": "def is_palindrome(s):\n s = s.lower()\n left = 0\n right = len(s) - 1\n \n while left < right:\n if s[left] != s[right]:\n return False\n left += 1\n right -= 1\n return True\n \nprint(is_palindrome(\"racecar\"))" + }, + { + "comment": "Generate a machine learning model in Python using scikit-learn for classifying digits in the MNIST dataset.", + "code_snippet": "# Import the necessary libraries \nfrom sklearn.datasets import fetch_openml\nimport numpy as np\nfrom sklearn.linear_model import LogisticRegression\n\n#Load the dataset\nmnist = fetch_openml('mnist_784')\n\n# Split into features and labels\nX = mnist.data\ny = mnist.target\n\n# Reshape data to be valid\nX = X.reshape((X.shape[0], -1))\nprint(X.shape, y.shape)\n\n# Create a Logistic Regression classifier \nclf = LogisticRegression()\n\n# Fit the model to the data \nclf.fit(X, y)" + }, + { + "comment": "Develop a Python script to generate an RSA public/private key pair.", + "code_snippet": "from Crypto.PublicKey import RSA\n\ndef generate_key_pair():\n key = RSA.generate(2048)\n pub_key = key.publickey().export_key().decode()\n with open('public.pem', 'w') as file:\n file.write(pub_key)\n \n priv_key = key.export_key().decode()\n with open('private.pem', 'w') as file:\n file.write(priv_key)\n\ngenerate_key_pair()" + }, + { + "comment": "Create a web application in Python using Flask that will take user input and generate an HTML page with the input.", + "code_snippet": "from flask import Flask, render_template, request\n\napp = Flask(__name__)\n\n@app.route('/', methods=['GET', 'POST'])\ndef home():\n if request.method == 'POST':\n input_text = request.form.get('input_text')\n return render_template('result.html', input_text=input_text)\n return render_template('index.html')\n\nif __name__ == '__main__':\n app.run(debug=True)\n\n// result.html file\n\n \n

Your Input:

\n

{{input_text}}

\n \n" + }, + { + "comment": "Implement a basic machine learning model in Python that performs binary classification on the given data.", + "code_snippet": "from sklearn import linear_model\nfrom sklearn.metrics import accuracy_score\n\nX = [[0.3, 0.2, 0.6], [0.3, 0.1, 0.4], [0.2, 0.3, 0.4], [0.7, 0.4, 0.2], [0.5, 0.2, 0.1]]\ny = [1, 0, 0, 1, 0]\n\nmodel = linear_model.LogisticRegression()\nmodel.fit(X, y)\n\ny_pred = model.predict(X)\naccuracy = accuracy_score(y, y_pred)\n\nprint('Model accuracy:', accuracy)" + }, + { + "comment": "Design a machine learning algorithm in Python that can identify whether a given sentence is an 'affirmative' sentence or a 'negative' sentence.", + "code_snippet": "import re \nimport nltk\nnltk.download('stopwords')\nfrom nltk.corpus import stopwords \n\n# Function to find the negation words\ndef findNegation(sentence):\n # save all the negation words\n negationwords = {'not', 'never', 'no', 'none', 'non', 'neither',\n 'nobody', 'nowhere', 'cannot', 'couldnt', 'shouldnt', \n 'wouldnt', 'dont', 'didnt', 'hasnt', 'havent', 'isnt', 'arent'} \n words = nltk.word_tokenize(sentence)\n neg = []\n for word in words:\n if word in negationwords:\n neg.append(word)\n return neg\n\n# Function to check if the sentence is affirmative or negative\ndef isAffirmativeorNegative(sentence):\n negationwords = findNegation(sentence)\n if len(negationwords)==0: \n return \"Affirmative\" \n else: \n return \"Negative\" \n\nsentence = \"He always comes late but I dont mind.\"\nresult = isAffirmativeorNegative(sentence) \nprint(result) # prints Negative" + }, + { + "comment": "Create an environment object that can execute and analyze Python code. The environment includes a set of predefined global variables and functions, as well as the ability to add additional global variables. The environment can execute Python code and analyze it for variable usage and imports. The environment also includes a stdout object that can be used to capture and display output from executed code.", + "code_snippet": "import os\nimport ast\nimport traceback\nimport time\nimport sys\nimport types\nimport builtins\nimport collections\nimport astor\nimport weakref\nfrom .jsonify import jsonify, jsonify_print, jsonify_print_expr\nfrom .datalayer import Analysis, Execution, FileEdit\nfrom .router import send\nfrom . import stdlib\n\ndef now():\n return int(time.time() * 1000)\n\nclass Environment:\n\n extra_globals = {}\n\n active = weakref.WeakSet()\n\n def __init__(self, path):\n self.path = path\n self.globals = {\n \"print\": jsonify_print,\n \"print_expr\": jsonify_print_expr,\n \"jsonify\": jsonify,\n \"jsonify_print\": jsonify_print,\n \"listdir\": stdlib.listdir,\n \"__builtins__\": __builtins__,\n \"FILES\": stdlib.FilesDict(self.path),\n }\n for name in stdlib.builtin_names:\n self.globals[name] = getattr(stdlib, name)\n self._cached_analysis = {}\n self.active.add(self)\n\n predefined_names = set([\"parsed\"])\n\n def init_commands(self):\n \"\"\"Returns a list of commands that represent the existing state of the\n filesystem\"\"\"\n for path in os.listdir(self.path):\n if path.endswith(\".json\"):\n continue\n if not os.path.isfile(os.path.join(self.path, path)):\n continue\n try:\n with open(os.path.join(self.path, path), \"r\") as fp:\n content = fp.read()\n yield FileEdit(filename=path, content=content, external_edit=True)\n except UnicodeDecodeError:\n pass\n\n def fixup_globals(self):\n for name, value in self.extra_globals.items():\n self.globals.setdefault(name, value)\n\n def execute(self, filename, content, subexpressions=False):\n print(\"Executing\", filename, subexpressions)\n self.fixup_globals()\n stdout = Stdout()\n compiled = None\n try:\n parsed = ast.parse(content, filename, mode='exec')\n RewriteExprToPrint(subexpressions).walk(parsed)\n var_inspect = VariableInspector()\n var_inspect.walk(parsed)\n print(\"varsed used:\", sorted(var_inspect.used), \"set:\", sorted(var_inspect.set), \"imported:\", var_inspect.imports)\n compiled = compile(parsed, filename, 'exec')\n except:\n stdout.write(traceback.format_exc())\n\n def displayhook(value):\n stdout.write_repr(value)\n\n orig_displayhook = sys.displayhook\n sys.displayhook = displayhook\n orig_stdout = sys.stdout\n orig_stderr = sys.stderr\n sys.stdout = stdout\n sys.stderr = stdout\n self.globals[\"parsed\"] = parsed\n self.globals[\"ast\"] = ast\n globals_before = self.globals.copy()\n start = time.time()\n try:\n try:\n if compiled:\n exec(compiled, self.globals)\n except:\n traceback.print_exc()\n finally:\n end = time.time()\n sys.dipslayhook = orig_displayhook\n sys.stdout = orig_stdout\n sys.stderr = orig_stderr\n local_scope = dict(\n (name, value)\n for name, value in self.globals.items()\n if name not in globals_before or globals_before[name] is not value)\n defines = dict(\n (key, {\n \"json\": jsonify(local_scope[key]),\n \"type\": str(type(local_scope[key])),\n })\n for key in local_scope\n if not isinstance(local_scope[key], types.ModuleType))\n command = Execution(\n filename=filename,\n content=content,\n emitted=stdout.emitted,\n defines=defines,\n start_time=int(start * 1000),\n end_time=int(end * 1000),\n exec_time=int((end - start) * 1000),\n with_subexpressions=subexpressions,\n )\n send(command)\n\n def analyze(self, filename, content):\n print(\"Analyzing\", filename)\n properties = {}\n try:\n parsed = ast.parse(content, filename, mode='exec')\n var_inspect = VariableInspector()\n var_inspect.walk(parsed)\n except:\n return\n properties[\"parse_error\"] = jsonify(traceback.format_exc())\n else:\n properties = var_inspect.json\n if properties != self._cached_analysis.get(filename):\n self._cached_analysis[filename] = properties\n send(Analysis(filename=filename, content=content, properties=properties))\n\n\nclass VariableInspector(astor.TreeWalk):\n\n builtin_names = dir(builtins)\n\n def init_variables(self):\n self.used = set()\n self.set = set()\n self.imports = set()\n self.in_target = False\n\n @property\n def json(self):\n used = set(self.used)\n for key in self.builtin_names:\n used.discard(key)\n for key in self.set:\n used.discard(key)\n for key in Environment.predefined_names:\n used.discard(key)\n return {\n \"variables_used\": list(used),\n \"variables_set\": list(self.set),\n \"imports\": list(self.imports)\n }\n\n def pre_arg(self):\n self.set.add(self.cur_node.arg)\n\n def pre_Name(self):\n if self.in_target:\n # Actually this is a set\n self.set.add(self.cur_node.id)\n else:\n self.used.add(self.cur_node.id)\n\n def pre_For(self):\n self.process_assignment(self.cur_node.target)\n\n def pre_Assign(self):\n self.process_assignment(self.cur_node.targets)\n\n def pre_withitem(self):\n self.process_assignment(self.cur_node.optional_vars)\n\n def pre_ExceptHandler(self):\n if self.cur_node.name:\n self.set.add(self.cur_node.name)\n\n def pre_alias(self):\n # Used in imports\n name = self.cur_node.asname or self.cur_node.name\n name = name.split(\".\")[0]\n self.set.add(name)\n self.imports.add(name)\n\n def pre_FunctionDef(self):\n self.set.add(self.cur_node.name)\n\n def pre_ListComp(self):\n self.process_assignment(self.cur_node.elt)\n\n def process_assignment(self, item):\n if isinstance(item, list):\n for x in item:\n self.process_assignment(x)\n return\n old_in_target = self.in_target\n self.in_target = True\n try:\n self.walk(item)\n finally:\n self.in_target = old_in_target\n\nclass RewriteExprToPrint(astor.TreeWalk):\n\n expr_node_types = \"\"\"\n UnaryOp\n BinOp\n BoolOp\n Compare\n Call\n IfExp\n Attribute\n Subscript\n ListComp SetComp GeneratorExp DictComp\n \"\"\".split()\n # Skipped:\n # UAdd USub Not Invert\n # Add Sub Mult Div FloorDiv Mod Pow LShift RShift BitOr BitXor BitAnd MatMult\n # And Or\n # Eq NotEq Lt Gt GtE Is IsNot In NotIn\n # Index Slice ExtSlice\n\n def __init__(self, subexpressions=False):\n self.subexpressions = subexpressions\n self.id_counter = 0\n astor.TreeWalk.__init__(self)\n if self.subexpressions:\n for method in self.expr_node_types:\n self.pre_handlers[method] = self.save_node_name\n self.post_handlers[method] = self.fixup_subexpressions\n del self.post_handlers['Module']\n\n def post_Name(self):\n if not self.subexpressions:\n return\n if isinstance(self.cur_node.ctx, ast.Load):\n self.replace(self.rewrite_expr(self.cur_node))\n\n def post_Module(self):\n node = self.cur_node\n node.body = [\n self.rewrite_expr(n) if isinstance(n, ast.Expr) else n\n for n in node.body]\n\n def save_node_name(self):\n self.cur_node.astor_repr = astor.to_source(self.cur_node)\n\n def fixup_subexpressions(self):\n new_node = self.rewrite_expr(self.cur_node, self.cur_node.astor_repr)\n self.replace(new_node)\n\n def rewrite_expr(self, node, expr_string=None):\n if expr_string is None:\n expr_string = astor.to_source(node)\n node_string = ast.Str(s=expr_string)\n self.id_counter += 1\n if isinstance(node, ast.Expr):\n new_node = ast.Expr(\n ast.Call(\n func=ast.Name(id='print_expr', ctx=ast.Load()),\n args=[node_string, node.value, ast.Num(n=self.id_counter)],\n keywords=[],\n starargs=None,\n )\n )\n new_node.is_print_expr = True\n else:\n new_node = ast.Call(\n func=ast.Name(id='print_expr', ctx=ast.Load()),\n args=[node_string, node, ast.Num(n=self.id_counter)],\n keywords=[],\n starargs=None,\n )\n new_node.is_print_expr = True\n ast.fix_missing_locations(new_node)\n return new_node\n\n\nclass Stdout:\n\n total_exprs_limit = 100\n expr_limit = 10\n\n def __init__(self):\n self.emitted = []\n self.total_exprs_printed = 0\n self.exprs_printed = collections.Counter()\n\n def write(self, content):\n self.emitted.append({\n \"type\": \"print\",\n \"time\": now(),\n \"parts\": [{\"type\": \"str\", \"str\": content}],\n })\n\n def writejson(self, json):\n assert json.get(\"type\"), \"JSON objects must have a type\"\n json.setdefault(\"time\", now())\n self.emitted.append(json)\n\n def write_repr(self, o):\n self.emitted.append(jsonify(o))\n\n def flush(self):\n pass\n\ndef add_global(name, value):\n Environment.extra_globals[name] = value\n Environment.predefined_names.add(name)\n for env in Environment.active:\n env.globals.setdefault(name, value)\n" + }, + { + "comment": "Write a Python class to modify the case of the string", + "code_snippet": "class StringCaseModifier:\n def __init__(self, string):\n self.string = string\n\n def to_upper(self):\n return self.string.upper()\n\n def to_lower(self):\n return self.string.lower()\n \n def to_title(self):\n return self.string.title()\n\nif __name__ == '__main__':\n test_string = 'Hello World'\n string_modifier = StringCaseModifier(test_string)\n print(string_modifier.to_upper())\n print(string_modifier.to_lower())\n print(string_modifier.to_title())" + }, + { + "comment": "Generate a Python program to reverse the order of words in a given string.", + "code_snippet": "def reverse_words(str):\n return \" \".join(str.split()[::-1])" + }, + { + "comment": "Write a Python program to replace a given string with another string.", + "code_snippet": "sentence = \"I love walking on the beach\"\nold_word = \"walking\"\nnew_word = \"swimming\"\n\nnew_sentence = sentence.replace(old_word, new_word)\n\nprint(\"The new sentence is: '{}'\".format(new_sentence))" + }, + { + "comment": "Write a python program to merge two lists of integers into one sorted list without using the in-built sort function.", + "code_snippet": "def merge_sort(list1, list2): \n merged_list = list1 + list2 \n for i in range(len(merged_list)): \n min_idx = i \n for j in range(i+1, len(merged_list)): \n if merged_list[min_idx] > merged_list[j]: \n min_idx = j \n merged_list[i], merged_list[min_idx] = merged_list[min_idx], merged_list[i] \n return merged_list \n \nlist1 = [3, 5, 9, 1, 8] \nlist2 = [4, 11, 7, 2]\n \nprint(merge_sort(list1, list2))" + }, + { + "comment": "Create a Python dictionary with three keys named \"Name\", \"Address\" and \"Phone\".", + "code_snippet": "info = {\n 'Name':Name,\n 'Address':Address,\n 'Phone':Phone\n}" + }, + { + "comment": "Implement a chatbot in Python that can provide customer support in a restaurant.", + "code_snippet": "import random\n\n# list of possible answers\npossible_responses = [\n 'We serve a variety of dishes - from salads to pizzas to steaks!',\n 'Check out our menu for a full list of options',\n 'We serve a variety of dishes, including traditional Italian, Mexican, and American fare. What would you like to try?',\n 'We have a great selection of meals! What kind of dish are you looking for?',\n 'We have a unique selection of meals - have you checked out our menu?'\n]\n\n# read the user's question\nwith open('random_user_question.txt') as user_question:\n question = user_question.read()\n\n# choose a random response\nresponse = random.choice(possible_responses)\n\n# display the response to the user\nprint(response)" + }, + { + "comment": "Design a Python program to simulate a simple ATM machine.", + "code_snippet": "# account class with the necessary functions \nclass Account:\n def __init__(self, name, balance):\n self.name = name\n self.balance = balance\n\n def deposit(self, amount):\n self.balance += amount\n\n def withdrawal(self, amount):\n self.balance -= amount\n\n# ATM class with the necessary functions\nclass ATM:\n def __init__(self):\n self.accounts = []\n\n def createAccount(self, name, balance):\n account = Account(name, balance)\n self.accounts.append(account)\n\n def deposit(self, name, amount):\n for account in self.accounts:\n if account.name == name:\n account.deposit(amount)\n\n def withdrawal(self, name, amount):\n for account in self.accounts:\n if account.name == name:\n account.withdrawal(amount)\n\n def printBalance(self, name):\n for account in self.accounts:\n if account.name == name:\n print(name, \" Balance: \", account.balance)" + }, + { + "comment": "Create a program in Python which will take in a list of strings, and print out the longest string in the list.", + "code_snippet": "def longest_string(str_list): \n longest_str = '' \n for str in str_list: \n if len(str) > len(longest_str): \n longest_str = str \n \n return longest_str \n\nstring_list = ['foo', 'test', 'longstring', 'bar'] \nlong_string = longest_string(string_list) \n\nprint(long_string)" + }, + { + "comment": "Create a program in Python that will find the smallest number in a list of numbers.", + "code_snippet": "def find_smallest_number(numbers):\n smallest = numbers[0] \n for num in numbers:\n if num < smallest:\n smallest = num\n return smallest\n\nprint(find_smallest_number([5, 4, 12, 17, 9]))" + }, + { + "comment": "Generate a Python code to compute the moving average of time-series data points.", + "code_snippet": "def moving_average(data_points, window):\n moving_average = []\n for index in range(len(data_points)):\n start = 0 if index-window+1 < 0 else index-window+1\n window_data = data_points[start:index+1]\n average = sum(window_data)/len(window_data)\n moving_average.append(average)\n \n return moving_average\n \nwindow = 3\ntime_series = [3, 5, 7, 2, 8, 10, 11, 65, 72, 81, 99, 100, 150]\nmoving_average = moving_average(time_series, window)\nprint(moving_average)" + }, + { + "comment": "Create a Python program to filter an array by a given criteria.", + "code_snippet": "# Function to filter an array by a given criteria\ndef filter_array(array, criteria):\n # Create an empty list\n filtered_array = []\n # Iterate through the array\n for value in array:\n # Check if it matches the criteria\n if criteria(value):\n # If it matches, add it to the list\n filtered_array.append(value)\n\n # Return the filtered array\n return filtered_array\n\n# Input criteria\ndef criteria(x):\n return x % 5 == 0\n\nresult = filter_array(array, criteria)\nprint(result) # Outputs [5, 10, 15, 20, 25, 30]" + }, + { + "comment": "Create a Python function to find the maximum number from a list of non-negative integers.", + "code_snippet": "def find_max(list): \n max_num = 0\n for i in list: \n if i > max_num: \n max_num = i\n \n return max_num\n\nlist = [1, 3, 12, 5, 8, 10]\nmax_num = find_max(list)\nprint(\"Maximum number is:\", max_num)" + }, + { + "comment": "Write a Python function that takes a list of strings and returns the concatenation of all the elements with a separator/delimiter.", + "code_snippet": "def concatenate_strings(strings, separator):\n return separator.join(strings)" + }, + { + "comment": "Come up with a code in Python to find the union of two lists.", + "code_snippet": "list1 = [1, 2, 3, 4, 5]\nlist2 = [3, 4, 5, 6, 7]\n\ndef unionList(list1, list2):\n return set(list1 + list2)\n\nresult = unionList(list1, list2)\nprint(result)" + }, + { + "comment": "Construct a python dictionary containing student names and marks as key-value pairs.", + "code_snippet": "student_marks = { \n \"John\" : 90, \n \"Levi\" : 85, \n \"Jill\" : 95, \n \"Sara\" : 75\n}" + }, + { + "comment": "Write a code snippet for a simple linear regression model in Python.", + "code_snippet": "import pandas as pd\nimport matplotlib.pyplot as plt\nfrom sklearn import linear_model\n\n# Read data from CSV\ndata = pd.read_csv('data.csv')\n\n# Set X and Y\nX = data['X']\nY = data['Y']\n\n# Create linear regression model\nmodel = linear_model.LinearRegression()\n\n# Train the model\nmodel.fit(X, Y)\n\n# Make predictions\nY_pred = model.predict(X)\n\n# Visualize the results\nplt.scatter(X, Y)\nplt.plot(X, Y_pred, color='red')\nplt.show()" + }, + { + "comment": "Write code in Python to implement a linked list using classes.", + "code_snippet": "class Node:\n \"\"\"Node class to represent the node in a linked list\"\"\"\n def __init__(self, data):\n self.data = data\n self.next = None\n\n\nclass LinkedList:\n \"\"\"LinkedList class to represent the entire linked list\"\"\"\n def __init__(self):\n self.head = None\n \n def print_list(self):\n node = self.head\n while(node):\n print(node.data)\n node = node.next\n\n# Creating a linked list \nlinked_list = LinkedList()\n\n# Creating the nodes\nnode1 = Node(1)\nnode2 = Node(2)\nnode3 = Node(3)\n\n# Linking the nodes\nlinked_list.head = node1\nnode1.next = node2\nnode2.next = node3\n\n# Printing the linked list\nlinked_list.print_list()" + }, + { + "comment": "Construct an XML document using Python which contains the following data.", + "code_snippet": "import xml.etree.ElementTree as ET\n\n# creating the root element\ndata = ET.Element('data')\n\nitems = ['Item1', 'Item2', 'Item3']\n\n# loop over items\nfor item in items:\n # creating sub elements\n item_element = ET.SubElement(data, 'item')\n\n # assign the value for the sub elements\n item_element.text = item\n\n# write the XML Tree\ntree = ET.ElementTree(data)\ntree.write('data.xml')" + }, + { + "comment": "Make a Python program generating numbers", + "code_snippet": "def generate_numbers():\n numbers = []\n for i in range(10):\n numbers.append(random.randint(0,10))\n return numbers" + }, + { + "comment": "Convert this sql query into a python program", + "code_snippet": "import datetime, pyodbc\n\n# Establish a connection to the database\nconn = pyodbc.connect(\"\")\ncursor = conn.cursor()\n\n# Get the year of the order date\nsql = \"SELECT * FROM orders\"\nquery_date = datetime.date.today().year\nwhere_clause = \"WHERE YEAR(order_date) = ?\"\n\n# Execute the query\ncursor.execute(sql + \" \" + where_clause, query_date)\nresults = cursor.fetchall()\n\n# Output the results\nfor result in results:\n print(result)" + }, + { + "comment": "This code loads a configuration file and libraries, imports necessary modules, defines two custom colormaps, loads a data file, performs cubic spline interpolation on the data, applies a threshold to remove small values, and generates three plots: a histogram, an interpolated image, and a contour plot. To use this code, make sure to have the necessary files and libraries in the correct paths, and adjust the parameters as needed for your data.", + "code_snippet": "# -*- coding: utf-8 -*-\n\nimport os, sys\nprint(\"CWD: \" + os.getcwd() )\n\n# Load configuration file before pyplot\nconfig_path = os.path.abspath('../matplotlib/')\nsys.path.append(config_path)\nimport configuration as config\n\n# Library path\nlib_path = os.path.abspath('../../lib')\nsys.path.append(lib_path)\nimport framemanager_python\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport scipy.interpolate\n\n\n# Color map\n# Define \"bds_highcontrast\" color map by Britton Smith from http://yt-project.org/ \ncdict = {'red': ((0.0, 80/256., 80/256.),\n (0.2, 0.0, 0.0),\n (0.4, 0.0, 0.0),\n (0.6, 256/256., 256/256.),\n (0.95, 256/256., 256/256.),\n (1.0, 150/256., 150/256.)),\n 'green': ((0.0, 0/256., 0/256.),\n (0.2, 0/256., 0/256.),\n (0.4, 130/256., 130/256.),\n (0.6, 256/256., 256/256.),\n (1.0, 0.0, 0.0)),\n 'blue': ((0.0, 80/256., 80/256.),\n (0.2, 220/256., 220/256.),\n (0.4, 0.0, 0.0),\n (0.6, 20/256., 20/256.),\n (1.0, 0.0, 0.0))}\n\nplt.register_cmap(name='bds_highcontrast', data=cdict) \n\n# Define YELLOW_RED colormap: each row consists of (x, y0, y1) where the x must increase from 0 to 1\n#row i: x y0 y1\n# /\n# /\n#row i+1: x y0 y1\ncdict = {'red': ((0.0, 0.9, 0.9),\n (1.0, 0.9, 0.9)),\n 'green': ((0.0, 0.9, 0.9),\n (1.0, 0.0, 0.0)),\n 'blue': ((0.0, 0.0, 0.0),\n (1.0, 0.0, 0.0))}\nplt.register_cmap(name='YELLOW_RED', data=cdict) \n#cmap=plt.get_cmap('YELLOW_RED')\n#cmap=plt.get_cmap('autumn')\n#cmap=plt.get_cmap('gist_heat')\n#cmap=plt.get_cmap('Spectral_r')\n#cmap.set_under([0.0, 0.0, 0.0])\n\n\n\n# Load profile\nprofileName = os.path.abspath(\"foam_ball_short.dsa\")\nframeID = 230\n\nframeManager = framemanager_python.FrameManagerWrapper()\nframeManager.load_profile(profileName);\nnumTSFrames = frameManager.get_tsframe_count();\n\nmatrixID = 1\n\n# Load single frame\ntsframe = np.copy( frameManager.get_tsframe(frameID, matrixID) );\n\ncols = tsframe.shape[1]\nrows = tsframe.shape[0]\n\n# Add padding on border\npadding = 2\nv_padding = np.empty((padding, cols)); v_padding.fill(-50)\nh_padding = np.empty((rows+2*padding, padding)); h_padding.fill(-50)\nzs = np.vstack([v_padding, tsframe]) # Top\nzs = np.vstack([zs, v_padding]) # Bottom\nzs = np.hstack([h_padding, zs]) # Left\nzs = np.hstack([zs, h_padding]) # Right\n\n# Update matrix size with padding\ncols = zs.shape[1]\nrows = zs.shape[0]\n\n# Coordinates of sampled data points\nxs = np.arange(0, cols, 1)\nys = np.arange(0, rows, 1)\n\n# Coordinates of interpolation points\nscaleFactor = 10;\nxi = np.linspace(xs.min(), xs.max(), cols*scaleFactor)\nyi = np.linspace(ys.min(), ys.max(), rows*scaleFactor)\n\n\n\n\n\n#------------------------------------------------------\n# Interpolate with cubic splines\nspline = scipy.interpolate.RectBivariateSpline(ys, xs, zs, kx=3, ky=3, s=0)\n\n# Evaluate splines\nzi = spline(yi, xi)\n\n#------------------------------------------------------\n\n\n'''\n#------------------------------------------------------\n# Polynomial interpolation: \u2018linear\u2019, \u2018nearest\u2019, \u2018cubic\u2019\ncoordinates = [(y, x) for y in ys for x in xs]\nzs_flattened = np.ravel(zs, order='C')\ncoordinates_interpolated = [(y, x) for y in yi for x in xi]\n\n# Interpolate with griddata\nzi_flattened= scipy.interpolate.griddata(coordinates, zs_flattened, coordinates_interpolated, method='cubic')\n\n# Reshape flattened array to 2D\nzi = zi_flattened.reshape((rows*scaleFactor, cols*scaleFactor))\n#------------------------------------------------------\n'''\n\n\n\n\n#------------------------------------------------------\n# Old API\n# Set up a regular grid of sampled data points\n#ys, xs = np.meshgrid(xs, ys)\n\n# Set up a regular grid of interpolated points\n#yi, xi = np.meshgrid(xi, yi)\n\n# Interpolate\n#tck = scipy.interpolate.bisplrep(xs2, ys2, zs, kx=3, ky=3, s=0)\n\n# Evaluate splines\n#zi = scipy.interpolate.bisplev(xi2[:,0], yi2[0,:], tck)\n#------------------------------------------------------\n\n\n\n\n# Apply threshold to level out small values (interpolation ripples)\nmin_threshold = 25\nzi[zi < min_threshold ] = 0 \n\n\n\n#########################################\n# Plotting\n#########################################\nfig, ax = plt.subplots()\n\n\n############\n# Histogram\n############\nplt.hist(zi.flatten(), 128, range=(min_threshold, zi.max()), fc='k', ec='k')\nplt.savefig(\"histogram.pdf\", format='pdf')\nplt.close() \n\n########################\n# Interpolated image\n########################\nfig, ax = plt.subplots()\n\n# Interpolated image\n#cmap=plt.get_cmap('gray')\ncmap=plt.get_cmap('bds_highcontrast')\ncax = ax.imshow(zi, cmap=cmap, vmin=zs.min(), vmax=zs.max(), origin='lower', extent=[xs.min(), xs.max(), ys.min(), ys.max()])\n\n# Colorbar with countour levels\ncbar = fig.colorbar(cax)\ncbar.set_label('Raw sensor value', rotation=90)\ncbar.solids.set_edgecolor(\"face\") # set the color of the lines\n\nax.invert_yaxis()\nax.xaxis.tick_top()\nplt.axis('off')\n\nplt.savefig(\"interpolation.pdf\", format='pdf')\nplt.close() \n\n\n\n############\n# Contour\n############\nfig, ax = plt.subplots()\n\n# Nearest-Neighbor Image\ncax = ax.imshow(zs, interpolation='nearest', cmap=plt.get_cmap('gray'), vmin=zs.min(), vmax=zs.max(), origin='lower', extent=[xs.min(), xs.max(), ys.min(), ys.max()]) \n\n#------------------------------------------------------\n# Contour lines: contour()\n#------------------------------------------------------\ncountour_threshold = 50\nlevels = np.linspace(countour_threshold, zs.max(), 10)\n#contour = ax.contour(xi, yi, zi, levels, linewidths=1.0, colors=[(0.0, 0.0, 0.0)], origin='upper') # black contour\ncontour = ax.contour(xi, yi, zi, levels, linewidths=1.0, colors=[(1.0, 0.0, 0.0)], origin='upper') # Red contour\n#contour = ax.contour(xi, yi, zi, levels, linewidths=1.0, cmap=plt.get_cmap('bds_highcontrast'), origin='upper') # Colormap\n\n#plt.clabel(contour, inline=True, fontsize=9)\n\n# Colorbar with countour levels\ncbar = fig.colorbar(cax)\ncbar.add_lines(contour)\ncbar.set_label('Raw sensor value', rotation=90)\ncbar.solids.set_edgecolor(\"face\") # set the color of the lines\n\n'''\n#------------------------------------------------------\n# Filled contours: contourf()\n#------------------------------------------------------\n\n# Background image\nbackground = np.empty((rows, cols)); background.fill(0)\ncax = ax.imshow(background, cmap=plt.get_cmap('gray'), origin='lower', extent=[xs.min(), xs.max(), ys.min(), ys.max()] )\n\n# Filled contour\ncountour_threshold = 100 # Ignore \"ripples\" from spline extrapolation\nmax_threshold = 0 # Boost the upper limit to avoid truncation error\nlevels = np.linspace(countour_threshold, zs.max(), num=10, endpoint=True)\n\n# Levels correspond to midpoint of layers:\n# Extend level range to enlarge top layer (avoid ugly hole)\nlevels[-1] = levels[-1] + (levels[-1] - levels[-2])/2\n\ncontour = ax.contourf(xi, yi, zi, levels=levels, cmap=plt.get_cmap('bds_highcontrast'), origin='upper') \n\ncbar = fig.colorbar(contour, format='%.0f')\ncbar.set_label('mV', rotation=0)\ncbar.solids.set_edgecolor(\"face\") # set the color of the lines\n\n# Restore old levels\n#levels[-1] = zs.max() \n#cbar.set_ticks(levels)\n#------------------------------------------------------\n'''\n\n\nax.invert_yaxis()\nax.xaxis.tick_top()\nplt.axis('off')\n\nplt.savefig(\"contour.pdf\", format='pdf')\nplt.show() \n\n\n\n \n" + }, + { + "comment": "Write a function to generate a set of random numbers in Python.", + "code_snippet": "import random\n\ndef generate_randoms(n):\n # generate a set of n random numbers\n rands = []\n for _ in range(n):\n rands.append(random.randint(1,100))\n return rands\n\ngenerate_randoms(10)" + }, + { + "comment": "Write a python program to check if a given string contains any numbers", + "code_snippet": "def string_contains_number(string):\n pattern = re.compile(\"[0-9]\") \n return pattern.search(string) != None" + }, + { + "comment": "Design a class in Python for storing movie information including the name, year of release, genre and rating.", + "code_snippet": "class Movie:\n def __init__(self, name, release_year, genre, rating):\n self.name = name\n self.release_year = release_year\n self.genre = genre\n self.rating = rating\n\nmovie = Movie(\"Interstellar\", 2014, \"Sci-Fi\", 8.4)\nprint(movie.name, movie.release_year, movie.genre, movie.rating)" + }, + { + "comment": "Given an input string, write a Python program to count the occurrence of each character in the string.", + "code_snippet": "def char_occurrences(a_string):\n char_count = {}\n for char in a_string:\n if char in char_count:\n char_count[char] += 1\n else:\n char_count[char] = 1\n return char_count\n\nstring = \"Hello, World!\"\nprint(char_occurrences(string))\n\n# Output\n# {'H': 1, 'e': 1, 'l': 3, 'o': 2, ', ': 1, 'W': 1, 'r': 1, 'd': 1, '!': 1}" + }, + { + "comment": "Create a Python program to generate a random password. Specify a minimum length of 8 characters and it should contain at least one number and one special character.", + "code_snippet": "import string\nimport random\n\ndef generate_password():\n length = 8\n chars = string.ascii_letters + string.digits + string.punctuation\n password = \"\".join(random.choice(chars) for i in range(length))\n \n return password\n\npassword = generate_password()\nprint(password)" + }, + { + "comment": "Write a Python function to generate an even number between 10 and 200 that is divisible by 5.", + "code_snippet": "def generate_even_number():\n# Generate a random number between 10 and 200\n num = random.randint(10, 200)\n# Check if the number is even\n if num % 2 == 0:\n# Check if the number is divisible by 5\n if num % 5 == 0:\n return num\n# If number not divisible by 5, increment number\n else:\n return generate_even_number(num+1)\n# If number not even, increment number\n else:\n return generate_even_number(num+1)" + }, + { + "comment": "Create a Python function to find the closest value to a given number in a given array.", + "code_snippet": "def closest_num(arr, num):\n arr.sort()\n n = len(arr)\n left = 0\n right = n - 1\n \n if arr[right] <= num:\n return arr[right]\n elif arr[left] >= num:\n return arr[left]\n while right-left > 1:\n mid = (right+left)//2\n if arr[mid] == num:\n return arr[mid]\n elif arr[mid] > num:\n right = mid\n else:\n left = mid\n return sorted([arr[right], arr[left]])[0]\n\narr = [5, 4, 8, 7]\nnum = 5\nclosest = closest_num(arr, num)\nprint(closest)" + }, + { + "comment": "Write a Python code to store and print the multiplication table of a given number.", + "code_snippet": "# Function to print the multiplication table\ndef print_multiplication_table(num):\n # Iterate over numbers from 1 to 10\n for i in range(1, 11):\n print('{0} x {1} = {2}'.format(num, i, num*i))\n\nprint_multiplication_table(5)" + }, + { + "comment": "Found and fix any bugs with the following python code.", + "code_snippet": "def sum_list(list):\n sum_list = 0\n for num in list:\n sum_list += num\n return sum_list" + }, + { + "comment": "Generate a function in Python that takes two parameters: \u2018x\u2019 and \u2018y\u2019 and prints the output as \u201cx is greater than y\u201d when x is greater than y, otherwise \u201cy is greater than or equal to x\u201d.", + "code_snippet": "def print_comparison(x, y):\n if x > y:\n print(f'{x} is greater than {y}')\n else:\n print(f'{y} is greater than or equal to {x}')" + }, + { + "comment": "Create a Python program to take an array of integers and calculate the sum of squares of all numbers provided in the array.", + "code_snippet": "def sum_squares(arr):\n total = 0\n for num in arr:\n total += num ** 2\n return total\n\narr = [2, 3, 4]\n\ntotal = sum_squares(arr)\nprint(total) # 29" + }, + { + "comment": "Design an algorithm in Python for finding out the factorial of a given number.", + "code_snippet": "def factorial(n):\n fact = 1\n for i in range(2,n+1):\n fact = fact * i\n return fact" + }, + { + "comment": "Construct a class in Python which implements two methods to add two numbers.", + "code_snippet": "class Calculator:\n def __init__(self):\n pass\n \n def add_two_numbers(self, a, b):\n return a + b\n\n def add(self, a, b, c):\n return a + b + c\n\ncalculator = Calculator()\nresult = calculator.add_two_numbers(1, 2)\nprint(result)" + }, + { + "comment": "Create a Python program that takes in a positive integer, then uses a while loop to output the numbers counting down to 1.", + "code_snippet": "num = int(input(\"Enter a positive integer: \"))\n\nwhile num > 0:\n print(num)\n num -= 1\n\nprint(\"Done!\")" + }, + { + "comment": "Develop an AI Tic-Tac-Toe game using Python.", + "code_snippet": "import random \n \n# printing the board \ndef drawBoard(board): \n \n print(\"---------------\") \n print(\" \"+board[1]+\" | \"+board[2]+\" | \"+board[3]+\" \") \n print(\"___|___|___\") \n print(\" \"+board[4]+\" | \"+board[5]+\" | \"+board[6]+\" \") \n print(\"___|___|___\") \n print(\" \"+board[7]+\" | \"+board[8]+\" | \"+board[9]+\" \") \n print(\" | | \") \n \n# defining the part of the game \ndef checkWin(board, player): \n return ( \n (board[1] == board[2] == board[3] == player) or\n (board[5] == board[4] == board[6] == player) or \n (board[7] == board[8] == board[9] == player) or \n (board[1] == board[5] == board[9] == player) or \n (board[3] == board[5] == board[7] == player) or \n (board[1] == board[4] == board[7] == player) or \n (board[2] == board[5] == board[8] == player) or \n (board[3] == board[6] == board[9] == player)) \n \ndef getComputerSpot(board,player): \n openSpots = [] \n indices = [i for i, spot in enumerate(board) if spot == '-'] \n \n for i in indices: \n board[i] = player \n \n if checkWin(board, player): \n board[i] = '-' \n return i \n board[i] = '-' \n \n for i in indices: \n openSpots.append(i) \n if len(openSpots): \n return random.choice(openSpots) \n else: \n return None \n \n \ndef playGame(): \n board = ['-' for x in range(10)] \n won = False\n turn = -1\n while not won: \n if turn == -1: \n # human turn \n humanSpot = int(input(\"Enter your spot: \"))\n if board[humanSpot] == '-': \n board[humanSpot] = 'X' \n else: \n print(\"the spot is already filled!\") \n continue\n turn *= -1\n \n if checkWin(board,'X'): \n drawBoard(board) \n print(\"You won!\") \n won = True\n break\n \n if turn == 1: \n # computer turn \n computerSpot = getComputerSpot(board,'O') \n if computerSpot == None: \n drawBoard(board) \n print(\"The game is drawn!\")\n break\n else: \n board[computerSpot] = 'O' \n turn *= -1\n \n drawBoard(board) \n \nplayGame()" + }, + { + "comment": "Create a Python program to calculate the sum of series 1 + \u00bd + \u2153 + ... + 1/n.", + "code_snippet": "def seriesSum(n):\n # set the initial sum to 0\n total = 0\n\n # loop over the range\n for i in range(1, n+1):\n total += 1 / i\n\n return total\n\nprint(seriesSum(5))" + }, + { + "comment": "Develop a decision tree classifier in Python to predict a fourth feature from three other features.", + "code_snippet": "import pandas as pd\n\n#importing the dataset \ndataset = pd.read_csv('data.csv')\n\n#splitting the dataset into features and labels \nX = dataset.iloc[:, :-1].values \ny = dataset.iloc[:, 3].values\n\n#importing DecisionTreeClassifier \nfrom sklearn.tree import DecisionTreeClassifier\n\nclassifier = DecisionTreeClassifier(criterion = 'entropy')\n\n#fitting the classifier to the dataset\nclassifier.fit(X,y)" + }, + { + "comment": "Implement a function in Python that takes a string argument and returns the characters of the string in reverse order.", + "code_snippet": "def reverse_string(input_string):\n reversed_string = ''\n for i in range(len(input_string)-1, -1, -1):\n reversed_string += input_string[i]\n return reversed_string" + }, + { + "comment": "Once the user has provided a list of numbers and a target number, suggest a Python function to check if any two numbers from the list sum up to the target.", + "code_snippet": "def check_sum(numbers, target):\n for i in range(len(numbers)):\n for j in range(i+1, len(numbers)):\n if numbers[i] + numbers[j] == target:\n return True\n return False" + }, + { + "comment": "Write a Python Program to print out the first n prime numbers.", + "code_snippet": "def sieve_of_eratosthenes(n):\n primes = [True] * (n+1)\n primes[0] = primes[1] = False\n for i in range(2, int(n**0.5)+1):\n if primes[i]:\n for j in range(i*i, n+1, i):\n primes[j] = False\n\n prime_numbers = []\n for i in range(len(primes)):\n if primes[i]:\n prime_numbers.append(i) \n return prime_numbers" + }, + { + "comment": "Generate a Python function that takes a dictionary as an argument and returns an inverted dictionary.", + "code_snippet": "def invert_dictionary(d):\n inverted_d = {}\n\n for key, value in d.items():\n if value not in inverted_d:\n inverted_d[value] = []\n inverted_d[value].append(key)\n\n return inverted_d" + }, + { + "comment": "Write a Python code to implement Gradient Descent Algorithm.", + "code_snippet": "\"\"\"\nImplement gradient descent algorithm in Python\n\"\"\"\nimport numpy as np\n\ndef gradient_descent(x,y,learning_rate=0.01,iterations=100): \n m_curr = b_curr = 0\n n = len(x)\n for i in range(iterations):\n y_predicted = m_curr * x + b_curr\n cost = (1/n) * sum([val**2 for val in (y-y_predicted)])\n md = -(2/n)*sum(x*(y-y_predicted))\n bd = -(2/n)*sum(y-y_predicted)\n m_curr = m_curr - learning_rate * md\n b_curr = b_curr - learning_rate * bd\n print (\"m {}, b {}, cost {} iteration {}\".format(m_curr,b_curr,cost, i))\n return m_curr, b_curr" + }, + { + "comment": "Binary search is an efficient algorithm for finding an item in a sorted list of items. Develop an efficient algorithm using Python that implements a binary search.", + "code_snippet": "def binary_search(nums, val):\n low = 0\n high = len(nums) - 1\n \n while low <= high:\n mid = (low + high) // 2\n if nums[mid] == val:\n return mid\n elif nums[mid] > val:\n high = mid - 1\n else:\n low = mid + 1\n return -1" + }, + { + "comment": "Construct a nested loop in Python to print out the following elements in a new line.", + "code_snippet": "for x in [1,2,3]:\n for y in [4,5,6]:\n for z in [7,8,9]:\n print(x,y,z)" + }, + { + "comment": "Create a machine learning program in Python that will classify text data into 3 different categories.", + "code_snippet": "import pandas as pd\nimport numpy as np\nfrom sklearn.feature_extraction.text import CountVectorizer\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.naive_bayes import MultinomialNB\nfrom sklearn.metrics import classification_report\n\n# Load the data\ndata = pd.read_csv(\"data.csv\")\n\n# Vectorize the text\ncv = CountVectorizer()\nX = cv.fit_transform(data[\"text\"])\n\n# Convert target labels to numerical values\ny = np.array([1 if label == \"category1\" else 2 if label == \"category2\" else 3 for label in data[\"category\"]])\n\n# Split into train and test data\nX_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)\n\n# Train the model\nnb = MultinomialNB()\nnb.fit(X_train, y_train)\n\n# Test the model\ny_pred = nb.predict(X_test)\n\n# Print out the results\nprint(classification_report(y_test, y_pred))" + }, + { + "comment": "Create a Python function that takes in a list and prints a dictionary with the elements and the product of each element.", + "code_snippet": "def product_of_list(list):\n output = {}\n for element in list:\n output[element] = element * element\n return output\n\nprint(product_of_list([7, 8, 5]))" + }, + { + "comment": "Write a function in Python to print the Fibonacci series.", + "code_snippet": "def printFibonacciSeries(n): \n \n # Taking 1st two fibonacci nubers as 0 and 1 \n f1 = 0\n f2 = 1\n print(f1, f2, end = \" \") \n \n for i in range(2, n): \n next = f1 + f2 \n print(next, end = \" \") \n \n # update values \n f1 = f2 \n f2 = next" + }, + { + "comment": "Design an algorithm in Python to find the longest common substring in two strings.", + "code_snippet": "def longestCommonSubstring(string1, string2):\n n1=len(string1)\n n2=len(string2)\n \n L=[[0 for _ in range(n2+1)]for _ in range(n1+1)]\n longestSubstring=\"\"\n longestSubstringLength=0\n for i in range(1, n1+1):\n for j in range(1, n2+1):\n if string1[i-1]==string2[j-1]:\n L[i][j]=L[i-1][j-1]+1\n if L[i][j]>longestSubstringLength:\n longestSubstringLength=L[i][j]\n longestSubstring=string1[i-1-longestSubstringLength+1:i]\n else:\n L[i][j]=0\n return longestSubstring" + }, + { + "comment": "Create a Python function to shuffle a list of numbers.", + "code_snippet": "import random\n\ndef shuffle_list(nums):\n random.shuffle(nums)\n return nums\n\nshuffled_list = shuffle_list([1, 2, 3, 4, 5])\nprint(shuffled_list) #[3, 5, 2, 4, 1]" + }, + { + "comment": "What Python code can you write that takes two strings and returns whether they are anagrams?", + "code_snippet": "def is_anagram(str1, str2):\n if len(str1) != len(str2):\n return False\n\n str1_counts = collections.Counter(str1)\n str2_counts = collections.Counter(str2)\n \n for char in str1_counts:\n if str1_counts[char] != str2_counts[char]:\n return False\n return True" + }, + { + "comment": "Create a basic Python script that computes the sum of two variables and stores the result in a third variable.", + "code_snippet": "# compute the sum of x and y\nresult = x + y\n\n# store the result in a third variable\nz = result\n\n#print the result\nprint(z) # Output: 13" + }, + { + "comment": "Generate a set of unique random numbers in the range 0-10 using Python.", + "code_snippet": "import random\nrandom_numbers = set()\nwhile len(random_numbers) < 10:\n random_numbers.add(random.randint(0, 10))\n\nprint(random_numbers)" + }, + { + "comment": "Write a function in Python to search for a given word in a large document. The function should return the line number of the document where the word is found.", + "code_snippet": "def findWord(document, word):\n lines = document.split('\\n')\n line_number = -1\n for i, line in enumerate(lines):\n if word in line:\n line_number = (i + 1)\n break\n return line_number\n\nfindWord(largeDocument, word) # 2" + }, + { + "comment": "Design a script in Python to scrape photos and captions from a given website.", + "code_snippet": "\"\"\"\nScript to scrape photos and captions from a given website\n\"\"\"\n\nfrom bs4 import BeautifulSoup\nimport requests\n\n# Get the HTML source code\nurl = 'https://example.com/webpage'\nreq = requests.get(url)\nhtml = req.text\n\n# Parse HTML source and extract photo and caption\nsoup = BeautifulSoup(html, features='lxml')\nimage_divs = soup.find_all('div', {'class': 'image-gallery__image'})\n\nfor image_div in image_divs:\n figure = image_div.find('figure')\n src = figure.find('img')['src']\n caption = figure.find('figcaption')\n print('Photo: ', src)\n if caption:\n print('Caption: ', caption.text)\n else:\n print('No caption')\n print('\\n')" + }, + { + "comment": "Create a python program that prints \"Hello World!\" five times.", + "code_snippet": "for x in range(5):\n print(\"Hello World!\")" + }, + { + "comment": "Write a Python program to find the common elements between two given lists.", + "code_snippet": "def find_common_elements(list1, list2):\n common = [x for x in list1 if x in list2]\n return common" + }, + { + "comment": "Generate a list of all numbers from 0 to 50 in Python, that are divisible by both 3 and 5.", + "code_snippet": "my_list = [x for x in range(0, 51) if x % 3 == 0 and x % 5 == 0]" + }, + { + "comment": "Create a class to create a linked list in python nodes.", + "code_snippet": "class ListNode:\n def __init__(self, data):\n self.data = data\n self.next = None" + }, + { + "comment": "Design a web server in Python using the Flask framework that would serve a json object containing the current temperature for a city based on the city name given in the request.", + "code_snippet": "import requests\nimport json\n\nfrom flask import Flask, request\n\napp = Flask(name)\n\n@app.route('/getTemp', methods=['POST'])\ndef get_temp():\n city = request.json['city']\n url = 'http://api.openweathermap.org/data/2.5/weather?q={}&APPID=xxxxxx'\n r = requests.get(url.format(city))\n weather = json.loads(r.text)\n return {'temperature': weather['main']['temp']}\n\nif name == 'main':\n app.run(debug=True)" + }, + { + "comment": "Improve the following Python program to reduce its time complexity.", + "code_snippet": "def find_max_sum(arr):\n max_sum = 0\n left = 0\n right = len(arr) - 1\n while left < right:\n if arr[left] + arr[right] > max_sum:\n max_sum = arr[left] + arr[right]\n if arr[left] > arr[right]:\n right -= 1\n else:\n left += 1\n return max_sum" + }, + { + "comment": "Write a Python program to format a given piece of text (input) with different options for font-style, size, and color.", + "code_snippet": "def format_text(text, font=\"Times New Roman\", size='14px', color='#000000'):\n return \"{}\".format(font, size, color, text)\n\nprint(format_text('Lorem ipsum dolor sit amet, consectetur adipiscing elit. Integer ut lectus vel nisi consequat bibendum ac a erat. Aliquam non velit sit amet arcu placerat vulputate a ut nibh.'))" + }, + { + "comment": "Write a Python script that takes a list of integers, and returns a list of tuples that contain the index and square of each number in the original list.", + "code_snippet": "def square_index(nums):\n return [(index, num ** 2) for index, num in enumerate(nums)]\n\nsquare_index(nums)\n# Output: [(0, 1), (1, 4), (2, 9), (3, 16), (4, 25)]" + }, + { + "comment": "Using Python, create a function that takes two numbers as input, and generates a random number between them.", + "code_snippet": "def random_number_generator(min, max):\n return random.randint(min, max)" + }, + { + "comment": "Generate a Python program to demonstrate a sorting algorithm.", + "code_snippet": "def sort_list(mylist):\n for i in range(len(mylist)):\n \n min_index = i\n temp = mylist[i]\n \n for j in range(i + 1, len(mylist)):\n \n if mylist[min_index] > mylist[j]:\n min_index = j\n \n mylist[i] = mylist[min_index]\n mylist[min_index] = temp\n \n return mylist\n \nmylist = [1, 5, 7, 8, 4, 10, 2]\n \nprint(\"Sorted list is:\")\nprint(sort_list(mylist))" + }, + { + "comment": "Write a code to delete a certain element from a linked list.", + "code_snippet": "# Python program to delete a node from Linked List \n \n# Node class \nclass Node: \n \n # Function to initialize the node object \n def __init__(self, data): \n self.data = data # Assign data \n self.next = None # Initialize next as null \n \n \n# Linked List class contains a Node object \nclass LinkedList: \n \n # Function to initialize head \n def __init__(self): \n self.head = None\n \n # Function to delete a node \n def deleteNode(self, node): \n \n # Store head node \n temp = self.head \n \n # If head node itself holds the key to be deleted \n if (temp is not None): \n if (temp == node): \n self.head = temp.next\n temp = None\n return\n \n # Search for the key to be deleted, keep track of the \n # previous node as we need to change 'prev.next' \n while(temp is not None): \n if temp == node: \n break\n prev = temp \n temp = temp.next \n \n # if key is not present in linked list \n if(temp == None): \n return\n \n # Unlink the node from linked list \n prev.next = temp.next\n \n temp = None" + }, + { + "comment": "Create a Python program to determine the total sum of all elements of a list.", + "code_snippet": "def calculate_sum_of_list(arr):\n total_sum = 0\n for item in arr:\n total_sum += item\n return total_sum\n\nlist1 = [1, 2, 3, 4]\nsum_of_list = calculate_sum_of_list(list1)\nprint(sum_of_list)" + }, + { + "comment": "Write a script in Python to compete in a Kaggle competition.", + "code_snippet": "# Load the necessary libraries\nimport pandas as pd\nimport numpy as np\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.metrics import accuracy_score\n\n# Read in the data\ntrain_data = pd.read_csv(\"data.csv\")\n\n# Split the data into X (predictors) and y (response)\nX = train_data.drop('response', axis=1)\ny = train_data['response']\n\n# Split into training and testing sets\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)\n\n# Define the model and fit it\nmodel = ...\nmodel.fit(X_train, y_train)\n\n# Make predictions on the test set\ny_pred = model.predict(X_test)\n\n# Score the prediction\nscore = accuracy_score(y_test, y_pred)\n\n# Submit the prediction to Kaggle\n# ..." + }, + { + "comment": "Given a list of integers and an integer k, can you write a Python program to find the consecutive integers whose sum is equal to the given integer k?", + "code_snippet": "def find_consecutive_integers(lst, k):\n # Edge cases\n if k > sum(lst) or k < 0:\n return -1\n \n # Create a window and move it's boundaries\n l, h, curr_sum = 0, 0, 0\n while h < len(lst):\n # Sum of current window\n curr_sum += lst[h] \n \n # See if the window sum equals k\n while curr_sum > k and l <= h:\n curr_sum -= lst[l]\n l += 1\n \n # When window sum is equal to k, print the winow\n if curr_sum == k:\n return lst[l:h+1]\n \n # Move the window one step at a time\n h += 1\n \n # If window size reaches last element and no window is \n # found\n return -1\n \nlst = [1,3,5,7,9]\nk = 10\nprint(find_consecutive_integers(lst, k)) # Output: [3,5,7]" + }, + { + "comment": "Design a basic chatbot in Python for responding to casual inquiries.", + "code_snippet": "import random\n\n#list of possible answers \nanswers = [\"That's a great question!\", \n \"I'm not sure, but I'll try to find out!\", \n \"That sounds interesting, tell me more!\", \n \"I have no idea, sorry!\"]\n\ndef chatbot_response(user_input): \n # get a random response from answers\n response = random.choice(answers)\n return response\n\n# simple loop to keep the conversation going\nwhile True:\n user_input = input('You: ')\n if user_input == 'Goodbye':\n print('Chatbot: Goodbye!')\n break\n else:\n print('Chatbot: ', chatbot_response(user_input))" + }, + { + "comment": "Write a Python program to print the top 5 most frequent words in a given sentence.", + "code_snippet": "import collections\n \nsentence = \"Python is an interpreted, high-level, general-purpose programming language.\"\n \n# Split sentence into words\nwords = sentence.split()\n \n# Set the counter for words\ncounter = collections.Counter(words)\n \n# Find top 5 most frequent words\nresult = counter.most_common(5)\nprint(result)" + }, + { + "comment": "Make a program in Python to print a staircase of height 'n', where n is an integer.", + "code_snippet": "def staircase(n): \n for i in range(0, n): \n for j in range(0, n - i - 1): \n print(end=\" \") \n for j in range(0, i + 1): \n print(\"#\", end=\"\") \n print(\"\\r\") \n \n# Driver code \nn = 5\nstaircase(n)" + }, + { + "comment": "Write python code to return the maximum number in a list.", + "code_snippet": "def find_max(nums):\n '''Returns the maximum number in the list.'''\n max_val = nums[0]\n for num in nums:\n if num > max_val:\n max_val = num\n \n return max_val" + }, + { + "comment": "Compose a python program to identify the characters in a given alphabet", + "code_snippet": "alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'\n\ndef identify_characters(alphabet):\n char_list = []\n for c in alphabet:\n char_list.append(c)\n return char_list" + }, + { + "comment": "Optimize a Flask application for efficient performance.", + "code_snippet": "from flask import Flask, request, jsonify\n\napp = Flask(__name__)\n\n@app.route('/api/', methods=['GET', 'POST'])\ndef process_request(endpoint):\n # Performance optimization\n # Use Redis for caching session and query performance\n # Use uWSGI for Python applications\n # Use gunicorn as WSGI server\n # Use Flask-Compress extension for compression\n # Use global variables where applicable\n # Don't use sql queries directly, always use model methods\n # Use query parameter in URLs instead of plain parameters\n # Use thread-local objects for performance\n\n # Your code here" + }, + { + "comment": "Design a Python program to take a list of integers and return the sum of the even numbers in the list.", + "code_snippet": "def sum_even_numbers(numbers):\n \n result = 0\n \n for number in numbers:\n if number % 2 == 0:\n result += number\n \n return result\n \nprint(sum_even_numbers([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]))" + }, + { + "comment": "Write a Python program to print the maximum number from a given list.", + "code_snippet": "def max_num(nums):\n max = nums[0]\n for num in nums[1:]:\n if num > max:\n max = num\n return max\n\nnums = [5, 4, 2, 7, 12]\nprint(max_num(nums))" + }, + { + "comment": "Write a Python program to sort a list of dictionaries by a key in each dictionary.", + "code_snippet": "def sort_list_of_dicts_by_key(my_list, key):\n return sorted(my_list, key=lambda k: k[key])" + }, + { + "comment": "Create a Python program to remove all occurrences of the given element in a given array.", + "code_snippet": "def remove_element(arr, element):\n arr = [x for x in arr if x != element]\n return arr" + }, + { + "comment": "Create a Python program to optimize a given multivariate function.", + "code_snippet": "import scipy.optimize as opt \n \ndef optimize_function(function, initial_guess): \n result = opt.minimize(function, initial_guess) \n return result.x" + }, + { + "comment": "Build a Python class to identify if a provided number is prime or composite.", + "code_snippet": "class PrimeNumber:\n def __init__(self, num):\n self.num = num\n\n def is_prime(self):\n for i in range(2, self.num):\n if(self.num % i == 0):\n return False\n return True\n\nnum = 10\nobj = PrimeNumber(num)\n\nif obj.is_prime():\n print(\"Number is Prime\")\nelse:\n print(\"Number is Composite\")" + }, + { + "comment": "Write a Python function to format a two-dimensional array as a table.", + "code_snippet": "def printTable(array):\n # find the longest string in each of the inner lists\n # so we can format the output\n col_width = [max(len(str(x)) for x in col) for col in array]\n for row in array:\n print(\" \".join(str(x).ljust(col_width[i]) for i, x in enumerate(row)))\n\narray = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]\nprintTable(array)\n\n# Output: \n# 1 2 3\n# 4 5 6\n# 7 8 9" + }, + { + "comment": "Develop a program in Python to check whether a string contains only digits.", + "code_snippet": "def is_digit_only(string):\n for char in string:\n if not char.isdigit():\n return False\n return True\n\nprint(is_digit_only(\"0123\"))" + }, + { + "comment": "Implement a function to remove all the duplicates from a linked list in Python.", + "code_snippet": "class Node: \n # Function to initialise the node object \n def __init__(self, data): \n self.data = data # Assign data \n self.next = None # Initialize next as null \n \n \n# Linked List class contains a Node object \nclass LinkedList: \n # Function to initialize head \n def __init__(self): \n self.head = None\n\n def remove_duplicates(self, list_nodes):\n curr_node= self.head \n new_list=[]\n while curr_node != None: \n if curr_node.data not in new_list: \n new_list.append(curr_node.data) \n curr_node = curr_node.next\n \n # creating a linked list from the unique elements of list_nodes \n self.head=None\n for i in new_list:\n self.insert_node(i)\n \n def insert_node(self, data): \n # Create a new node \n new_node = Node(data) \n # Check if list is empty\n if self.head is None: \n self.head = new_node \n return\n # Otherwise, traverse to the end of list before \n # inserting the new Node \n last = self.head \n while (last.next): \n last = last.next\n last.next = new_node" + }, + { + "comment": "Generate a Python program to calculate the maximum, minimum, and average values from a given sequence of numbers.", + "code_snippet": "# Function to calculate the maximum, minimum, and average values\ndef calculate_metrics(list):\n # Calculate the maximum value\n maximum = max(list)\n \n # Calculate the minimum value\n minimum = min(list)\n \n # Calculate the average\n n = len(list)\n total = 0\n for num in list:\n total += num\n average = total/n\n \n return maximum, minimum, average\n\n# Main Program \nlist = [3, 4, 5, 6, 21, 8]\n\nmaximum, minimum, average = calculate_metrics(list)\n\nprint(\"Maximum value:\", maximum)\nprint(\"Minimum value:\", minimum)\nprint(\"Average value:\", average)" + }, + { + "comment": "Create a Deep Learning Program in Python to Predict Stock Price.", + "code_snippet": "from keras.models import Sequential\nfrom keras.layers import Dense\nfrom keras.layers import LSTM\nfrom keras.layers import Dropout\nimport numpy as np\n\n# define constants\ntime_steps = 60\nbatch_size = 32\n\n# build the model\nmodel = Sequential()\nmodel.add(LSTM(units=50, return_sequences=True, input_shape=(time_steps, 1)))\nmodel.add(Dropout(0.2))\n\nmodel.add(LSTM(units=50, return_sequences=True))\nmodel.add(Dropout(0.2))\n\nmodel.add(LSTM(units=50))\nmodel.add(Dropout(0.2))\n\nmodel.add(Dense(units=1))\n\nmodel.compile(optimizer='adam', loss='mean_squared_error')\n\n# read the data \ndataset = np.loadtxt('data.csv', delimiter=',')\n\ndata = dataset[:, 0:1]\n\n# create train and test data\nx_train = data[:int(len(data)*0.8)]\nx_test = data[int(len(data)*0.8):]\n\n# reshape the data for input format\nx_train = np.reshape(x_train, (x_train.shape[0], time_steps, 1))\nx_test = np.reshape(x_test, (x_test.shape[0], time_steps, 1))\n\n# train the model\nmodel.fit(x_train, x_train, epochs=50, batch_size=batch_size, validation_data=(x_test, x_test))" + }, + { + "comment": "Generate a Python program that performs an edit distance calculation using dynamic programming.", + "code_snippet": "def edit_distance(str1, str2):\n m = len(str1)\n n = len(str2)\n edit_table = [[0 for x in range(n+1)] for x in range(m+1)] \n \n for i in range(m+1): \n for j in range(n+1): \n if i == 0: \n edit_table[i][j] = j\n elif j == 0: \n edit_table[i][j] = i\n elif str1[i-1] == str2[j-1]: \n edit_table[i][j] = edit_table[i-1][j-1] \n else: \n edit_table[i][j] = 1 + min(edit_table[i][j-1], edit_table[i-1][j], edit_table[i-1][j-1]) \n \n return edit_table[m][n]" + }, + { + "comment": "Create an algorithm in python that calculates the sum of squares of all the numbers from 1 to N.", + "code_snippet": "def sum_of_squares(n):\n result = 0\n for i in range(n+1):\n result += i**2\n return result" + }, + { + "comment": "Generate a Python program that takes a string and capitalizes the first character of each word.", + "code_snippet": "def capitalize_string(str):\n words = str.split(' ')\n capitalized_words = [word.capitalize() for word in words]\n return \" \".join(capitalized_words)\n\ncapitalized_string = capitalize_string(\"this is an example\")\nprint(capitalized_string)" + }, + { + "comment": "Construct a python program which takes in input the size of a jagged array and prints it(no values).", + "code_snippet": "def print_jagged_array(array_size): \n # Initialize a jagged array with the given size\n array = [None] * array_size\n \n # Populate the array with empty arrays\n for i in range(array_size):\n array[i] = []\n \n # Print the output array\n print(array)\n\nif __name__ == \"__main__\":\n array_size = 4\n print_jagged_array(array_size)" + }, + { + "comment": "Create a simple chatbot using Python and Dialogflow, which can answer questions about the current days of the week.", + "code_snippet": "from flask import Flask\nfrom flask_restful import Resource, Api\nfrom dialogflow import detect_intent_texts\n\napp = Flask(__name__)\napi = Api(app)\n\n\n@app.route('/', methods=['GET', 'POST'])\ndef detect_intent_text():\n IF session_id NOT present\n PLEASE create session_id\n request = detect_intent_texts(session_id, text)\n response = request.fulfillment_text\n return response\n\n\nif __name__ == '__main__':\n app.run()" + }, + { + "comment": "Create a python program for constructing a decision tree algorithm for predicting the value of a stock.", + "code_snippet": "# Load libraries\nimport pandas as pd\nimport numpy as np\n\n# Load data\ndata = pd.read_csv(\"stockdata.csv\")\n\n# Split data\nX = data.iloc[:, :-1].values\ny = data.iloc[:, -1].values\n\n# Train/Test Split\nfrom sklearn.model_selection import train_test_split\nX_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)\n\n# Decision Tree Model\nfrom sklearn.tree import DecisionTreeClassifier\nclf = DecisionTreeClassifier(criterion='entropy', random_state=0)\nclf.fit(X_train, y_train)\n\n# Model Accuracy\ny_pred = clf.predict(X_test)\nprint('Model accuracy: ',clf.score(X_test, y_test))" + }, + { + "comment": "Create a Python class with the given attributes and functions.", + "code_snippet": "class Person:\n \n def __init__(self, name, age, email):\n self.name = name\n self.age = age\n self.email = email\n \n def toString(self):\n return f\"Name: {self.name}, Age: {self.age}, Email: {self.email}\"" + }, + { + "comment": "How would you create a class in Python to store and process data in a Car object?", + "code_snippet": "class Car:\n def __init__(self, make, model, color, mileage):\n self.make = make\n self.model = model\n self.color = color\n self.mileage = mileage\n\n def accelerate(self, speed):\n self.speed = speed\n\n def decelerate(self, speed):\n self.speed = speed" + }, + { + "comment": "Write a Python program to print out the Fibonacci series up to n numbers where n is a user input.", + "code_snippet": "# Python Program to print the Fibonacci series up to n numbers\n\nn = int(input(\"Enter the value of 'n': \"))\n\n# first two numbers\na = 0\nb = 1\n\nprint(\"Fibonacci Series:\") \nprint(a, b, end=\" \") \n \nfor i in range(2,n):\n \n c = a + b\n a = b\n b = c\n print(c, end = \" \")\n \nprint()" + }, + { + "comment": "Edit the following Python program to return true if the following string contains the word \"Hello\".", + "code_snippet": "def check_hello(s):\n return 'Hello' in s" + }, + { + "comment": "Given a list of integers, write a Python program to find the index of the largest number.", + "code_snippet": "nums = [5, 10, 20, 6, 8]\nmax_num = max(nums)\nmax_index = nums.index(max_num)\n\nprint(max_index)" + }, + { + "comment": "Design a python program to handle an array with the following rules:\nIf the element is divisible by 3, the element should be replaced with \"Fizz\".\nIf the element is divisible by 5, the element should be replaced with \"Buzz\".\nIf the element is divisible by 3 and 5, the element should be replaced with \"FizzBuzz\".", + "code_snippet": "def fizz_buzz(arr):\n new_arr = []\n for element in arr:\n if element % 3 == 0 and element % 5 == 0:\n new_arr.append(\"FizzBuzz\")\n elif element % 3 == 0:\n new_arr.append(\"Fizz\")\n elif element % 5 == 0:\n new_arr.append(\"Buzz\")\n else:\n new_arr.append(element)\n return new_arr\n\nprint(fizz_buzz([2, 3, 4, 5, 6, 15]))" + }, + { + "comment": "Write a Python function that takes two strings as arguments and returns the longest common sub-string between them.", + "code_snippet": "def longest_substring(str_1, str_2):\n len_1 = len(str_1)\n len_2 = len(str_2)\n longest_substring = ''\n for x in range(len_1):\n for y in range(len_2):\n if str_1[x] == str_2[y]:\n cur_substring = str_1[x]\n while (x + 1 < len_1 and y + 1 < len_2 and\n str_1[x + 1] == str_2[y + 1]):\n cur_substring += str_1[x + 1]\n x += 1\n y += 1\n if len(cur_substring) > len(longest_substring):\n longest_substring = cur_substring\n return longest_substring\n\nlongest_sub = longest_substring(str_1, str_2)\nprint(longest_sub)" + }, + { + "comment": "Design a program to reverse a given string.", + "code_snippet": "def reverse_string(input_string):\n reversed_string = \"\"\n for i in range(len(input_string)-1, -1, -1):\n reversed_string += input_string[i]\n return reversed_string\n\ninput_string = \"Python\"\n\nprint(reverse_string(input_string))" + }, + { + "comment": "Generate a python program to calculate the minimum and maximum values in an array", + "code_snippet": "def min_max(array):\n min = array[0]\n max = array[0]\n\n for val in array[1:]:\n if val > max:\n max = val\n if val < min:\n min = val\n return min, max" + }, + { + "comment": "Create a function in Python that takes a list of strings, converts it to camelCase, and returns a new list with the converted strings.", + "code_snippet": "def convertToCamelCase(list):\n camelCaseList = []\n\n for string in list:\n string = string.capitalize()\n camelCaseList.append(string)\n\n return ''.join(camelCaseList)" + }, + { + "comment": "Write a Python algorithm that finds the longest contiguous sequence of increasing numbers within an array.", + "code_snippet": "def find_longest_increasing_sequence(arr):\n longest_start = 0\n longest_length = 0\n\n current_start = 0\n current_length = 0\n\n for i in range(1, len(arr)):\n prev_val = arr[i - 1]\n curr_val = arr[i]\n\n if curr_val > prev_val:\n current_length += 1\n else:\n current_start = i\n current_length = 1\n\n if current_length > longest_length:\n longest_start = current_start\n longest_length = current_length\n\n longest_sequence = arr[longest_start:longest_start + longest_length]\n return longest_sequence\n\narr = [3, 2, 1, 4, 5, 6, 7, 8]\nlongest_seq = find_longest_increasing_sequence(arr)\nprint(longest_seq) // Output: [4, 5, 6, 7, 8]" + }, + { + "comment": "Generate a Python function to parse a JSON data structure.", + "code_snippet": "import json\n\ndef parse_json(data):\n data = json.loads(data)\n return data['name'], data['age'], data['occupation']\n\nname, age, occupation = parse_json(data)\nprint(name, age, occupation)\n# Output: John 18 Software Engineer" + }, + { + "comment": "Suggest an improvement for the following python code for finding the sum of elements of a list", + "code_snippet": "def sum_elements(arr):\n return sum(arr) # return sum of elements of arr with one line of code." + }, + { + "comment": "Generate a Python class to represent a 3-dimensional vector.", + "code_snippet": "class Vector3D:\n def __init__(self, x, y, z):\n self.x = x\n self.y = y\n self.z = z\n \n def length(self):\n return (self.x**2 + self.y**2 + self.z**2)**0.5" + }, + { + "comment": "Work out a solution to the Towers of Hanoi game using Python.", + "code_snippet": "def TowersOfHanoi(n, fromRod, toRod, auxRod): \n if n == 1: \n print(\"Move disk 1 from rod\",fromRod,\"to rod\",toRod) \n return\n TowersOfHanoi(n-1, fromRod, auxRod, toRod) \n print(\"Move disk\", n, \"from rod\", fromRod, \"to rod\", toRod) \n TowersOfHanoi(n-1, auxRod, toRod, fromRod) \n\nn = 4\nTowersOfHanoi(n, 'A', 'C', 'B')" + } +] \ No newline at end of file diff --git a/examples/data/issue_comment.json b/examples/data/issue_comment.json new file mode 100644 index 00000000..85e9335b --- /dev/null +++ b/examples/data/issue_comment.json @@ -0,0 +1,8264 @@ +[ + { + "title": "How to set_epoch with interleave_datasets?", + "html_url": "https://github.com/huggingface/datasets/issues/7051", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7051/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "add checkpoint and resume title in docs", + "html_url": "https://github.com/huggingface/datasets/pull/7050", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7050/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7050.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7050', 'merged_at': '2024-07-15T15:59:56Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7050.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7050'}", + "is_pull_request": true + }, + { + "title": "Save nparray as list", + "html_url": "https://github.com/huggingface/datasets/issues/7049", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7049/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "ImportError: numpy.core.multiarray when using `filter`", + "html_url": "https://github.com/huggingface/datasets/issues/7048", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7048/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Save Dataset as Sharded Parquet ", + "html_url": "https://github.com/huggingface/datasets/issues/7047", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7047/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Support librosa and numpy 2.0 for Python 3.10", + "html_url": "https://github.com/huggingface/datasets/pull/7046", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7046/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7046.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7046', 'merged_at': '2024-07-12T12:58:17Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7046.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7046'}", + "is_pull_request": true + }, + { + "title": "Fix tensorflow min version depending on Python version", + "html_url": "https://github.com/huggingface/datasets/pull/7045", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7045/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7045.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7045', 'merged_at': '2024-07-12T12:33:00Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7045.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7045'}", + "is_pull_request": true + }, + { + "title": "Mark tests that require librosa", + "html_url": "https://github.com/huggingface/datasets/pull/7044", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7044/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7044.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7044', 'merged_at': '2024-07-12T09:00:09Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7044.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7044'}", + "is_pull_request": true + }, + { + "title": "Add decorator as explicit test dependency", + "html_url": "https://github.com/huggingface/datasets/pull/7043", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7043/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7043.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7043', 'merged_at': '2024-07-12T08:07:10Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7043.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7043'}", + "is_pull_request": true + }, + { + "title": "Improved the tutorial by adding a link for loading datasets", + "html_url": "https://github.com/huggingface/datasets/pull/7042", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7042/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7042.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7042', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/7042.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7042'}", + "is_pull_request": true + }, + { + "title": "`sort` after `filter` unreasonably slow", + "html_url": "https://github.com/huggingface/datasets/issues/7041", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7041/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "load `streaming=True` dataset with downloaded cache", + "html_url": "https://github.com/huggingface/datasets/issues/7040", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7040/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix export to JSON when dataset larger than batch size", + "html_url": "https://github.com/huggingface/datasets/pull/7039", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7039/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7039.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7039', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/7039.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7039'}", + "is_pull_request": true + }, + { + "title": "Yes, can definitely elaborate:", + "html_url": "https://github.com/huggingface/datasets/issues/7038", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7038/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "A bug of Dataset.to_json() function", + "html_url": "https://github.com/huggingface/datasets/issues/7037", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7037/comments", + "labels": "[{'color': 'd73a4a', 'default': True, 'description': \"Something isn't working\", 'id': 1935892857, 'name': 'bug', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODU3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/bug'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix doc generation when NamedSplit is used as parameter default value", + "html_url": "https://github.com/huggingface/datasets/pull/7036", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7036/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7036.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7036', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/7036.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7036'}", + "is_pull_request": true + }, + { + "title": "Docs are not generated when a parameter defaults to a NamedSplit value", + "html_url": "https://github.com/huggingface/datasets/issues/7035", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7035/comments", + "labels": "[{'color': 'd4c5f9', 'default': False, 'description': 'Maintenance tasks', 'id': 4296013012, 'name': 'maintenance', 'node_id': 'LA_kwDODunzps8AAAABAA_01A', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/maintenance'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "chore: fix typos in docs", + "html_url": "https://github.com/huggingface/datasets/pull/7034", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7034/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7034.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7034', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/7034.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7034'}", + "is_pull_request": true + }, + { + "title": "`from_generator` does not allow to specify the split name", + "html_url": "https://github.com/huggingface/datasets/issues/7033", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7033/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Register `.zstd` extension for zstd-compressed files", + "html_url": "https://github.com/huggingface/datasets/pull/7032", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7032/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7032.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7032', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/7032.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7032'}", + "is_pull_request": true + }, + { + "title": "CI quality is broken: use ruff check instead", + "html_url": "https://github.com/huggingface/datasets/issues/7031", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7031/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Add option to disable progress bar when reading a dataset (\"Loading dataset from disk\")", + "html_url": "https://github.com/huggingface/datasets/issues/7030", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7030/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "load_dataset on AWS lambda throws OSError(30, 'Read-only file system') error", + "html_url": "https://github.com/huggingface/datasets/issues/7029", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7029/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix ci", + "html_url": "https://github.com/huggingface/datasets/pull/7028", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7028/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7028.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7028', 'merged_at': '2024-07-04T15:19:16Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7028.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7028'}", + "is_pull_request": true + }, + { + "title": "Missing line from previous pr", + "html_url": "https://github.com/huggingface/datasets/pull/7027", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7027/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7027.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7027', 'merged_at': '2024-07-04T14:34:36Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7027.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7027'}", + "is_pull_request": true + }, + { + "title": "Fix check_library_imports", + "html_url": "https://github.com/huggingface/datasets/pull/7026", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7026/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7026.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7026', 'merged_at': '2024-07-04T14:20:02Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7026.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7026'}", + "is_pull_request": true + }, + { + "title": "feat: support non streamable arrow file binary format", + "html_url": "https://github.com/huggingface/datasets/pull/7025", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7025/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7025.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7025', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/7025.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7025'}", + "is_pull_request": true + }, + { + "title": "Streaming dataset not returning data", + "html_url": "https://github.com/huggingface/datasets/issues/7024", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7024/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Remove dead code for pyarrow < 15.0.0", + "html_url": "https://github.com/huggingface/datasets/pull/7023", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7023/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7023.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7023', 'merged_at': '2024-07-03T09:17:35Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7023.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7023'}", + "is_pull_request": true + }, + { + "title": "There is dead code after we require pyarrow >= 15.0.0", + "html_url": "https://github.com/huggingface/datasets/issues/7022", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7022/comments", + "labels": "[{'color': 'd4c5f9', 'default': False, 'description': 'Maintenance tasks', 'id': 4296013012, 'name': 'maintenance', 'node_id': 'LA_kwDODunzps8AAAABAA_01A', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/maintenance'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix casting list array to fixed size list", + "html_url": "https://github.com/huggingface/datasets/pull/7021", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7021/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7021.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7021', 'merged_at': '2024-07-03T08:41:55Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7021.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7021'}", + "is_pull_request": true + }, + { + "title": "Casting list array to fixed size list raises error", + "html_url": "https://github.com/huggingface/datasets/issues/7020", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7020/comments", + "labels": "[{'color': 'd73a4a', 'default': True, 'description': \"Something isn't working\", 'id': 1935892857, 'name': 'bug', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODU3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/bug'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Allow Polars round trip by supporting pyarrow large list", + "html_url": "https://github.com/huggingface/datasets/pull/7019", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7019/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7019.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7019', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/7019.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7019'}", + "is_pull_request": true + }, + { + "title": "`load_dataset` fails to load dataset saved by `save_to_disk`", + "html_url": "https://github.com/huggingface/datasets/issues/7018", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7018/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Support fsspec 2024.6.1", + "html_url": "https://github.com/huggingface/datasets/pull/7017", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7017/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7017.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7017', 'merged_at': '2024-07-01T12:06:24Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7017.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7017'}", + "is_pull_request": true + }, + { + "title": "`drop_duplicates` method", + "html_url": "https://github.com/huggingface/datasets/issues/7016", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7016/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "add split argument to Generator", + "html_url": "https://github.com/huggingface/datasets/pull/7015", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7015/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7015.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7015', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/7015.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7015'}", + "is_pull_request": true + }, + { + "title": "Skip faiss tests on Windows to avoid running CI for 360 minutes", + "html_url": "https://github.com/huggingface/datasets/pull/7014", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7014/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7014.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7014', 'merged_at': '2024-07-01T07:10:27Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7014.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7014'}", + "is_pull_request": true + }, + { + "title": "CI is broken for faiss tests on Windows: node down: Not properly terminated", + "html_url": "https://github.com/huggingface/datasets/issues/7013", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7013/comments", + "labels": "[{'color': 'd4c5f9', 'default': False, 'description': 'Maintenance tasks', 'id': 4296013012, 'name': 'maintenance', 'node_id': 'LA_kwDODunzps8AAAABAA_01A', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/maintenance'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Raise an error when a nested object is expected to be a mapping that displays the object", + "html_url": "https://github.com/huggingface/datasets/pull/7012", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7012/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7012.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7012', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/7012.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7012'}", + "is_pull_request": true + }, + { + "title": "Re-enable raising error from huggingface-hub FutureWarning in CI", + "html_url": "https://github.com/huggingface/datasets/pull/7011", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7011/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7011.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7011', 'merged_at': '2024-06-28T12:19:28Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7011.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7011'}", + "is_pull_request": true + }, + { + "title": "Re-enable raising error from huggingface-hub FutureWarning in CI", + "html_url": "https://github.com/huggingface/datasets/issues/7010", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7010/comments", + "labels": "[{'color': 'd4c5f9', 'default': False, 'description': 'Maintenance tasks', 'id': 4296013012, 'name': 'maintenance', 'node_id': 'LA_kwDODunzps8AAAABAA_01A', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/maintenance'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Support ruff 0.5.0 in CI", + "html_url": "https://github.com/huggingface/datasets/pull/7009", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7009/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7009.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7009', 'merged_at': '2024-06-28T07:11:17Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7009.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7009'}", + "is_pull_request": true + }, + { + "title": "Support ruff 0.5.0 in CI", + "html_url": "https://github.com/huggingface/datasets/issues/7008", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7008/comments", + "labels": "[{'color': 'd4c5f9', 'default': False, 'description': 'Maintenance tasks', 'id': 4296013012, 'name': 'maintenance', 'node_id': 'LA_kwDODunzps8AAAABAA_01A', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/maintenance'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix CI by temporarily pinning ruff < 0.5.0", + "html_url": "https://github.com/huggingface/datasets/pull/7007", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7007/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7007.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7007', 'merged_at': '2024-06-28T05:25:17Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7007.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7007'}", + "is_pull_request": true + }, + { + "title": "CI is broken after ruff-0.5.0: E721", + "html_url": "https://github.com/huggingface/datasets/issues/7006", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7006/comments", + "labels": "[{'color': 'd4c5f9', 'default': False, 'description': 'Maintenance tasks', 'id': 4296013012, 'name': 'maintenance', 'node_id': 'LA_kwDODunzps8AAAABAA_01A', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/maintenance'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "EmptyDatasetError: The directory at /metadata.jsonl doesn't contain any data files", + "html_url": "https://github.com/huggingface/datasets/issues/7005", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7005/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix WebDatasets KeyError for user-defined Features when a field is missing in an example", + "html_url": "https://github.com/huggingface/datasets/pull/7004", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7004/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7004.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7004', 'merged_at': '2024-06-28T09:30:12Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7004.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7004'}", + "is_pull_request": true + }, + { + "title": "minor fix for bfloat16", + "html_url": "https://github.com/huggingface/datasets/pull/7003", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7003/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7003.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7003', 'merged_at': '2024-06-25T16:10:10Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7003.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7003'}", + "is_pull_request": true + }, + { + "title": "Fix dump of bfloat16 torch tensor", + "html_url": "https://github.com/huggingface/datasets/pull/7002", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7002/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7002.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7002', 'merged_at': '2024-06-25T15:51:52Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7002.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7002'}", + "is_pull_request": true + }, + { + "title": "Datasetbuilder Local Download FileNotFoundError", + "html_url": "https://github.com/huggingface/datasets/issues/7001", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7001/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "IterableDataset: Unsupported ScalarType BFloat16", + "html_url": "https://github.com/huggingface/datasets/issues/7000", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7000/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Remove tasks", + "html_url": "https://github.com/huggingface/datasets/pull/6999", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6999/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6999.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6999', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6999.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6999'}", + "is_pull_request": true + }, + { + "title": "Fix tests using hf-internal-testing/librispeech_asr_dummy", + "html_url": "https://github.com/huggingface/datasets/pull/6998", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6998/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6998.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6998', 'merged_at': '2024-06-25T08:13:42Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6998.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6998'}", + "is_pull_request": true + }, + { + "title": "CI is broken for tests using hf-internal-testing/librispeech_asr_dummy", + "html_url": "https://github.com/huggingface/datasets/issues/6997", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6997/comments", + "labels": "[{'color': 'd4c5f9', 'default': False, 'description': 'Maintenance tasks', 'id': 4296013012, 'name': 'maintenance', 'node_id': 'LA_kwDODunzps8AAAABAA_01A', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/maintenance'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Remove deprecated code", + "html_url": "https://github.com/huggingface/datasets/pull/6996", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6996/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6996.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6996', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6996.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6996'}", + "is_pull_request": true + }, + { + "title": "ImportError when importing datasets.load_dataset", + "html_url": "https://github.com/huggingface/datasets/issues/6995", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6995/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix incorrect rank value in data splitting", + "html_url": "https://github.com/huggingface/datasets/pull/6994", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6994/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6994.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6994', 'merged_at': '2024-06-25T16:19:17Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6994.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6994'}", + "is_pull_request": true + }, + { + "title": "less script docs", + "html_url": "https://github.com/huggingface/datasets/pull/6993", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6993/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6993.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6993', 'merged_at': '2024-06-27T09:31:21Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6993.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6993'}", + "is_pull_request": true + }, + { + "title": "Dataset with streaming doesn't work with proxy", + "html_url": "https://github.com/huggingface/datasets/issues/6992", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6992/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Unblock NumPy 2.0", + "html_url": "https://github.com/huggingface/datasets/pull/6991", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6991/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6991.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6991', 'merged_at': '2024-07-12T12:04:53Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6991.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6991'}", + "is_pull_request": true + }, + { + "title": "Problematic rank after calling `split_dataset_by_node` twice", + "html_url": "https://github.com/huggingface/datasets/issues/6990", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6990/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "cache in nfs error", + "html_url": "https://github.com/huggingface/datasets/issues/6989", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6989/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "[`feat`] Move dataset card creation to method for easier overriding", + "html_url": "https://github.com/huggingface/datasets/pull/6988", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6988/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6988.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6988', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6988.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6988'}", + "is_pull_request": true + }, + { + "title": "Remove beam", + "html_url": "https://github.com/huggingface/datasets/pull/6987", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6987/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6987.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6987', 'merged_at': '2024-06-26T19:35:42Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6987.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6987'}", + "is_pull_request": true + }, + { + "title": "Add large_list type support in string_to_arrow", + "html_url": "https://github.com/huggingface/datasets/pull/6986", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6986/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6986.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6986', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6986.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6986'}", + "is_pull_request": true + }, + { + "title": "AttributeError: module 'pyarrow.lib' has no attribute 'ListViewType'", + "html_url": "https://github.com/huggingface/datasets/issues/6985", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6985/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Convert polars DataFrame back to datasets", + "html_url": "https://github.com/huggingface/datasets/issues/6984", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6984/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Remove metrics", + "html_url": "https://github.com/huggingface/datasets/pull/6983", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6983/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6983.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6983', 'merged_at': '2024-06-28T06:51:30Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6983.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6983'}", + "is_pull_request": true + }, + { + "title": "cannot split dataset when using load_dataset", + "html_url": "https://github.com/huggingface/datasets/issues/6982", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6982/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Update docs on trust_remote_code defaults to False", + "html_url": "https://github.com/huggingface/datasets/pull/6981", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6981/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6981.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6981', 'merged_at': '2024-06-19T14:26:37Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6981.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6981'}", + "is_pull_request": true + }, + { + "title": "Support NumPy 2.0", + "html_url": "https://github.com/huggingface/datasets/issues/6980", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6980/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "How can I load partial parquet files only?", + "html_url": "https://github.com/huggingface/datasets/issues/6979", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6979/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix regression for pandas < 2.0.0 in JSON loader", + "html_url": "https://github.com/huggingface/datasets/pull/6978", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6978/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6978.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6978', 'merged_at': '2024-06-19T05:50:18Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6978.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6978'}", + "is_pull_request": true + }, + { + "title": "load json file error with v2.20.0", + "html_url": "https://github.com/huggingface/datasets/issues/6977", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6977/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Ensure compatibility with numpy 2.0.0", + "html_url": "https://github.com/huggingface/datasets/pull/6976", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6976/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6976.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6976', 'merged_at': '2024-06-19T14:04:34Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6976.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6976'}", + "is_pull_request": true + }, + { + "title": "Set temporary numpy upper version < 2.0.0 to fix CI", + "html_url": "https://github.com/huggingface/datasets/pull/6975", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6975/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6975.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6975', 'merged_at': '2024-06-17T12:43:56Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6975.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6975'}", + "is_pull_request": true + }, + { + "title": "IndexError during training with Squad dataset and T5-small model", + "html_url": "https://github.com/huggingface/datasets/issues/6973", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6973/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix webdataset pickling", + "html_url": "https://github.com/huggingface/datasets/pull/6972", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6972/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6972.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6972', 'merged_at': '2024-06-14T15:37:35Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6972.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6972'}", + "is_pull_request": true + }, + { + "title": "packaging: Remove useless dependencies", + "html_url": "https://github.com/huggingface/datasets/pull/6971", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6971/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6971.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6971', 'merged_at': '2024-06-14T13:57:24Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6971.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6971'}", + "is_pull_request": true + }, + { + "title": "Set dev version", + "html_url": "https://github.com/huggingface/datasets/pull/6970", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6970/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6970.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6970', 'merged_at': '2024-06-13T14:59:56Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6970.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6970'}", + "is_pull_request": true + }, + { + "title": "Release: 2.20.0", + "html_url": "https://github.com/huggingface/datasets/pull/6969", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6969/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6969.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6969', 'merged_at': '2024-06-13T14:55:53Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6969.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6969'}", + "is_pull_request": true + }, + { + "title": "Use `HF_HUB_OFFLINE` instead of `HF_DATASETS_OFFLINE`", + "html_url": "https://github.com/huggingface/datasets/pull/6968", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6968/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6968.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6968', 'merged_at': '2024-06-13T17:25:37Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6968.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6968'}", + "is_pull_request": true + }, + { + "title": "Method to load Laion400m", + "html_url": "https://github.com/huggingface/datasets/issues/6967", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6967/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Remove underlines between badges", + "html_url": "https://github.com/huggingface/datasets/pull/6966", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6966/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6966.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6966', 'merged_at': '2024-06-19T14:10:11Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6966.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6966'}", + "is_pull_request": true + }, + { + "title": "Improve skip take shuffling and distributed", + "html_url": "https://github.com/huggingface/datasets/pull/6965", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6965/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6965.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6965', 'merged_at': '2024-06-24T15:16:16Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6965.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6965'}", + "is_pull_request": true + }, + { + "title": "Fix resuming arrow format", + "html_url": "https://github.com/huggingface/datasets/pull/6964", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6964/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6964.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6964', 'merged_at': '2024-06-14T14:58:37Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6964.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6964'}", + "is_pull_request": true + }, + { + "title": "[Streaming] retry on requests errors", + "html_url": "https://github.com/huggingface/datasets/pull/6963", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6963/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6963.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6963', 'merged_at': '2024-06-28T09:46:52Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6963.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6963'}", + "is_pull_request": true + }, + { + "title": "fix(ci): remove unnecessary permissions", + "html_url": "https://github.com/huggingface/datasets/pull/6962", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6962/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6962.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6962', 'merged_at': '2024-06-11T08:25:47Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6962.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6962'}", + "is_pull_request": true + }, + { + "title": "Manual downloads should count as downloads", + "html_url": "https://github.com/huggingface/datasets/issues/6961", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6961/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "feat(ci): add trufflehog secrets detection", + "html_url": "https://github.com/huggingface/datasets/pull/6960", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6960/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6960.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6960', 'merged_at': '2024-06-08T14:52:18Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6960.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6960'}", + "is_pull_request": true + }, + { + "title": "Better error handling in `dataset_module_factory`", + "html_url": "https://github.com/huggingface/datasets/pull/6959", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6959/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6959.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6959', 'merged_at': '2024-06-10T07:27:43Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6959.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6959'}", + "is_pull_request": true + }, + { + "title": "My Private Dataset doesn't exist on the Hub or cannot be accessed", + "html_url": "https://github.com/huggingface/datasets/issues/6958", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6958/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix typos in docs", + "html_url": "https://github.com/huggingface/datasets/pull/6957", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6957/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6957.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6957', 'merged_at': '2024-06-05T12:43:26Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6957.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6957'}", + "is_pull_request": true + }, + { + "title": "update docs on N-dim arrays", + "html_url": "https://github.com/huggingface/datasets/pull/6956", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6956/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6956.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6956', 'merged_at': '2024-06-04T16:40:27Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6956.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6956'}", + "is_pull_request": true + }, + { + "title": "Fix small typo", + "html_url": "https://github.com/huggingface/datasets/pull/6955", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6955/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6955.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6955', 'merged_at': '2024-06-04T15:20:55Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6955.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6955'}", + "is_pull_request": true + }, + { + "title": "Remove default `trust_remote_code=True`", + "html_url": "https://github.com/huggingface/datasets/pull/6954", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6954/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6954.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6954', 'merged_at': '2024-06-07T12:20:29Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6954.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6954'}", + "is_pull_request": true + }, + { + "title": "Remove canonical datasets from docs", + "html_url": "https://github.com/huggingface/datasets/issues/6953", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6953/comments", + "labels": "[{'color': '0075ca', 'default': True, 'description': 'Improvements or additions to documentation', 'id': 1935892861, 'name': 'documentation', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODYx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/documentation'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Move info_utils errors to exceptions module", + "html_url": "https://github.com/huggingface/datasets/pull/6952", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6952/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6952.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6952', 'merged_at': '2024-06-10T14:03:55Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6952.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6952'}", + "is_pull_request": true + }, + { + "title": "load_dataset() should load all subsets, if no specific subset is specified", + "html_url": "https://github.com/huggingface/datasets/issues/6951", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6951/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "How to set_epoch with interleave_datasets?", + "html_url": "https://github.com/huggingface/datasets/issues/7051", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7051/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "add checkpoint and resume title in docs", + "html_url": "https://github.com/huggingface/datasets/pull/7050", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7050/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7050.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7050', 'merged_at': '2024-07-15T15:59:56Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7050.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7050'}", + "is_pull_request": true + }, + { + "title": "Save nparray as list", + "html_url": "https://github.com/huggingface/datasets/issues/7049", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7049/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "ImportError: numpy.core.multiarray when using `filter`", + "html_url": "https://github.com/huggingface/datasets/issues/7048", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7048/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Save Dataset as Sharded Parquet ", + "html_url": "https://github.com/huggingface/datasets/issues/7047", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7047/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Support librosa and numpy 2.0 for Python 3.10", + "html_url": "https://github.com/huggingface/datasets/pull/7046", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7046/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7046.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7046', 'merged_at': '2024-07-12T12:58:17Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7046.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7046'}", + "is_pull_request": true + }, + { + "title": "Fix tensorflow min version depending on Python version", + "html_url": "https://github.com/huggingface/datasets/pull/7045", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7045/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7045.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7045', 'merged_at': '2024-07-12T12:33:00Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7045.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7045'}", + "is_pull_request": true + }, + { + "title": "Mark tests that require librosa", + "html_url": "https://github.com/huggingface/datasets/pull/7044", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7044/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7044.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7044', 'merged_at': '2024-07-12T09:00:09Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7044.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7044'}", + "is_pull_request": true + }, + { + "title": "Add decorator as explicit test dependency", + "html_url": "https://github.com/huggingface/datasets/pull/7043", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7043/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7043.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7043', 'merged_at': '2024-07-12T08:07:10Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7043.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7043'}", + "is_pull_request": true + }, + { + "title": "Improved the tutorial by adding a link for loading datasets", + "html_url": "https://github.com/huggingface/datasets/pull/7042", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7042/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7042.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7042', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/7042.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7042'}", + "is_pull_request": true + }, + { + "title": "`sort` after `filter` unreasonably slow", + "html_url": "https://github.com/huggingface/datasets/issues/7041", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7041/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "load `streaming=True` dataset with downloaded cache", + "html_url": "https://github.com/huggingface/datasets/issues/7040", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7040/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix export to JSON when dataset larger than batch size", + "html_url": "https://github.com/huggingface/datasets/pull/7039", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7039/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7039.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7039', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/7039.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7039'}", + "is_pull_request": true + }, + { + "title": "Yes, can definitely elaborate:", + "html_url": "https://github.com/huggingface/datasets/issues/7038", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7038/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "A bug of Dataset.to_json() function", + "html_url": "https://github.com/huggingface/datasets/issues/7037", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7037/comments", + "labels": "[{'color': 'd73a4a', 'default': True, 'description': \"Something isn't working\", 'id': 1935892857, 'name': 'bug', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODU3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/bug'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix doc generation when NamedSplit is used as parameter default value", + "html_url": "https://github.com/huggingface/datasets/pull/7036", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7036/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7036.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7036', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/7036.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7036'}", + "is_pull_request": true + }, + { + "title": "Docs are not generated when a parameter defaults to a NamedSplit value", + "html_url": "https://github.com/huggingface/datasets/issues/7035", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7035/comments", + "labels": "[{'color': 'd4c5f9', 'default': False, 'description': 'Maintenance tasks', 'id': 4296013012, 'name': 'maintenance', 'node_id': 'LA_kwDODunzps8AAAABAA_01A', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/maintenance'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "chore: fix typos in docs", + "html_url": "https://github.com/huggingface/datasets/pull/7034", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7034/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7034.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7034', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/7034.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7034'}", + "is_pull_request": true + }, + { + "title": "`from_generator` does not allow to specify the split name", + "html_url": "https://github.com/huggingface/datasets/issues/7033", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7033/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Register `.zstd` extension for zstd-compressed files", + "html_url": "https://github.com/huggingface/datasets/pull/7032", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7032/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7032.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7032', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/7032.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7032'}", + "is_pull_request": true + }, + { + "title": "CI quality is broken: use ruff check instead", + "html_url": "https://github.com/huggingface/datasets/issues/7031", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7031/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Add option to disable progress bar when reading a dataset (\"Loading dataset from disk\")", + "html_url": "https://github.com/huggingface/datasets/issues/7030", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7030/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "load_dataset on AWS lambda throws OSError(30, 'Read-only file system') error", + "html_url": "https://github.com/huggingface/datasets/issues/7029", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7029/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix ci", + "html_url": "https://github.com/huggingface/datasets/pull/7028", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7028/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7028.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7028', 'merged_at': '2024-07-04T15:19:16Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7028.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7028'}", + "is_pull_request": true + }, + { + "title": "Missing line from previous pr", + "html_url": "https://github.com/huggingface/datasets/pull/7027", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7027/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7027.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7027', 'merged_at': '2024-07-04T14:34:36Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7027.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7027'}", + "is_pull_request": true + }, + { + "title": "Fix check_library_imports", + "html_url": "https://github.com/huggingface/datasets/pull/7026", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7026/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7026.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7026', 'merged_at': '2024-07-04T14:20:02Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7026.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7026'}", + "is_pull_request": true + }, + { + "title": "feat: support non streamable arrow file binary format", + "html_url": "https://github.com/huggingface/datasets/pull/7025", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7025/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7025.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7025', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/7025.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7025'}", + "is_pull_request": true + }, + { + "title": "Streaming dataset not returning data", + "html_url": "https://github.com/huggingface/datasets/issues/7024", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7024/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Remove dead code for pyarrow < 15.0.0", + "html_url": "https://github.com/huggingface/datasets/pull/7023", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7023/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7023.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7023', 'merged_at': '2024-07-03T09:17:35Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7023.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7023'}", + "is_pull_request": true + }, + { + "title": "There is dead code after we require pyarrow >= 15.0.0", + "html_url": "https://github.com/huggingface/datasets/issues/7022", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7022/comments", + "labels": "[{'color': 'd4c5f9', 'default': False, 'description': 'Maintenance tasks', 'id': 4296013012, 'name': 'maintenance', 'node_id': 'LA_kwDODunzps8AAAABAA_01A', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/maintenance'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix casting list array to fixed size list", + "html_url": "https://github.com/huggingface/datasets/pull/7021", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7021/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7021.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7021', 'merged_at': '2024-07-03T08:41:55Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7021.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7021'}", + "is_pull_request": true + }, + { + "title": "Casting list array to fixed size list raises error", + "html_url": "https://github.com/huggingface/datasets/issues/7020", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7020/comments", + "labels": "[{'color': 'd73a4a', 'default': True, 'description': \"Something isn't working\", 'id': 1935892857, 'name': 'bug', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODU3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/bug'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Allow Polars round trip by supporting pyarrow large list", + "html_url": "https://github.com/huggingface/datasets/pull/7019", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7019/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7019.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7019', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/7019.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7019'}", + "is_pull_request": true + }, + { + "title": "`load_dataset` fails to load dataset saved by `save_to_disk`", + "html_url": "https://github.com/huggingface/datasets/issues/7018", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7018/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Support fsspec 2024.6.1", + "html_url": "https://github.com/huggingface/datasets/pull/7017", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7017/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7017.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7017', 'merged_at': '2024-07-01T12:06:24Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7017.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7017'}", + "is_pull_request": true + }, + { + "title": "`drop_duplicates` method", + "html_url": "https://github.com/huggingface/datasets/issues/7016", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7016/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "add split argument to Generator", + "html_url": "https://github.com/huggingface/datasets/pull/7015", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7015/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7015.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7015', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/7015.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7015'}", + "is_pull_request": true + }, + { + "title": "Skip faiss tests on Windows to avoid running CI for 360 minutes", + "html_url": "https://github.com/huggingface/datasets/pull/7014", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7014/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7014.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7014', 'merged_at': '2024-07-01T07:10:27Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7014.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7014'}", + "is_pull_request": true + }, + { + "title": "CI is broken for faiss tests on Windows: node down: Not properly terminated", + "html_url": "https://github.com/huggingface/datasets/issues/7013", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7013/comments", + "labels": "[{'color': 'd4c5f9', 'default': False, 'description': 'Maintenance tasks', 'id': 4296013012, 'name': 'maintenance', 'node_id': 'LA_kwDODunzps8AAAABAA_01A', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/maintenance'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Raise an error when a nested object is expected to be a mapping that displays the object", + "html_url": "https://github.com/huggingface/datasets/pull/7012", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7012/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7012.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7012', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/7012.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7012'}", + "is_pull_request": true + }, + { + "title": "Re-enable raising error from huggingface-hub FutureWarning in CI", + "html_url": "https://github.com/huggingface/datasets/pull/7011", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7011/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7011.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7011', 'merged_at': '2024-06-28T12:19:28Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7011.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7011'}", + "is_pull_request": true + }, + { + "title": "Re-enable raising error from huggingface-hub FutureWarning in CI", + "html_url": "https://github.com/huggingface/datasets/issues/7010", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7010/comments", + "labels": "[{'color': 'd4c5f9', 'default': False, 'description': 'Maintenance tasks', 'id': 4296013012, 'name': 'maintenance', 'node_id': 'LA_kwDODunzps8AAAABAA_01A', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/maintenance'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Support ruff 0.5.0 in CI", + "html_url": "https://github.com/huggingface/datasets/pull/7009", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7009/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7009.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7009', 'merged_at': '2024-06-28T07:11:17Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7009.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7009'}", + "is_pull_request": true + }, + { + "title": "Support ruff 0.5.0 in CI", + "html_url": "https://github.com/huggingface/datasets/issues/7008", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7008/comments", + "labels": "[{'color': 'd4c5f9', 'default': False, 'description': 'Maintenance tasks', 'id': 4296013012, 'name': 'maintenance', 'node_id': 'LA_kwDODunzps8AAAABAA_01A', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/maintenance'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix CI by temporarily pinning ruff < 0.5.0", + "html_url": "https://github.com/huggingface/datasets/pull/7007", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7007/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7007.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7007', 'merged_at': '2024-06-28T05:25:17Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7007.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7007'}", + "is_pull_request": true + }, + { + "title": "CI is broken after ruff-0.5.0: E721", + "html_url": "https://github.com/huggingface/datasets/issues/7006", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7006/comments", + "labels": "[{'color': 'd4c5f9', 'default': False, 'description': 'Maintenance tasks', 'id': 4296013012, 'name': 'maintenance', 'node_id': 'LA_kwDODunzps8AAAABAA_01A', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/maintenance'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "EmptyDatasetError: The directory at /metadata.jsonl doesn't contain any data files", + "html_url": "https://github.com/huggingface/datasets/issues/7005", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7005/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix WebDatasets KeyError for user-defined Features when a field is missing in an example", + "html_url": "https://github.com/huggingface/datasets/pull/7004", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7004/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7004.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7004', 'merged_at': '2024-06-28T09:30:12Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7004.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7004'}", + "is_pull_request": true + }, + { + "title": "minor fix for bfloat16", + "html_url": "https://github.com/huggingface/datasets/pull/7003", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7003/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7003.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7003', 'merged_at': '2024-06-25T16:10:10Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7003.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7003'}", + "is_pull_request": true + }, + { + "title": "Fix dump of bfloat16 torch tensor", + "html_url": "https://github.com/huggingface/datasets/pull/7002", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7002/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7002.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7002', 'merged_at': '2024-06-25T15:51:52Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7002.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7002'}", + "is_pull_request": true + }, + { + "title": "Datasetbuilder Local Download FileNotFoundError", + "html_url": "https://github.com/huggingface/datasets/issues/7001", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7001/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "IterableDataset: Unsupported ScalarType BFloat16", + "html_url": "https://github.com/huggingface/datasets/issues/7000", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7000/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Remove tasks", + "html_url": "https://github.com/huggingface/datasets/pull/6999", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6999/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6999.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6999', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6999.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6999'}", + "is_pull_request": true + }, + { + "title": "Fix tests using hf-internal-testing/librispeech_asr_dummy", + "html_url": "https://github.com/huggingface/datasets/pull/6998", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6998/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6998.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6998', 'merged_at': '2024-06-25T08:13:42Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6998.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6998'}", + "is_pull_request": true + }, + { + "title": "CI is broken for tests using hf-internal-testing/librispeech_asr_dummy", + "html_url": "https://github.com/huggingface/datasets/issues/6997", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6997/comments", + "labels": "[{'color': 'd4c5f9', 'default': False, 'description': 'Maintenance tasks', 'id': 4296013012, 'name': 'maintenance', 'node_id': 'LA_kwDODunzps8AAAABAA_01A', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/maintenance'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Remove deprecated code", + "html_url": "https://github.com/huggingface/datasets/pull/6996", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6996/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6996.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6996', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6996.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6996'}", + "is_pull_request": true + }, + { + "title": "ImportError when importing datasets.load_dataset", + "html_url": "https://github.com/huggingface/datasets/issues/6995", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6995/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix incorrect rank value in data splitting", + "html_url": "https://github.com/huggingface/datasets/pull/6994", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6994/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6994.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6994', 'merged_at': '2024-06-25T16:19:17Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6994.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6994'}", + "is_pull_request": true + }, + { + "title": "less script docs", + "html_url": "https://github.com/huggingface/datasets/pull/6993", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6993/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6993.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6993', 'merged_at': '2024-06-27T09:31:21Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6993.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6993'}", + "is_pull_request": true + }, + { + "title": "Dataset with streaming doesn't work with proxy", + "html_url": "https://github.com/huggingface/datasets/issues/6992", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6992/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Unblock NumPy 2.0", + "html_url": "https://github.com/huggingface/datasets/pull/6991", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6991/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6991.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6991', 'merged_at': '2024-07-12T12:04:53Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6991.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6991'}", + "is_pull_request": true + }, + { + "title": "Problematic rank after calling `split_dataset_by_node` twice", + "html_url": "https://github.com/huggingface/datasets/issues/6990", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6990/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "cache in nfs error", + "html_url": "https://github.com/huggingface/datasets/issues/6989", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6989/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "[`feat`] Move dataset card creation to method for easier overriding", + "html_url": "https://github.com/huggingface/datasets/pull/6988", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6988/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6988.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6988', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6988.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6988'}", + "is_pull_request": true + }, + { + "title": "Remove beam", + "html_url": "https://github.com/huggingface/datasets/pull/6987", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6987/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6987.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6987', 'merged_at': '2024-06-26T19:35:42Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6987.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6987'}", + "is_pull_request": true + }, + { + "title": "Add large_list type support in string_to_arrow", + "html_url": "https://github.com/huggingface/datasets/pull/6986", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6986/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6986.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6986', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6986.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6986'}", + "is_pull_request": true + }, + { + "title": "AttributeError: module 'pyarrow.lib' has no attribute 'ListViewType'", + "html_url": "https://github.com/huggingface/datasets/issues/6985", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6985/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Convert polars DataFrame back to datasets", + "html_url": "https://github.com/huggingface/datasets/issues/6984", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6984/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Remove metrics", + "html_url": "https://github.com/huggingface/datasets/pull/6983", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6983/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6983.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6983', 'merged_at': '2024-06-28T06:51:30Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6983.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6983'}", + "is_pull_request": true + }, + { + "title": "cannot split dataset when using load_dataset", + "html_url": "https://github.com/huggingface/datasets/issues/6982", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6982/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Update docs on trust_remote_code defaults to False", + "html_url": "https://github.com/huggingface/datasets/pull/6981", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6981/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6981.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6981', 'merged_at': '2024-06-19T14:26:37Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6981.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6981'}", + "is_pull_request": true + }, + { + "title": "Support NumPy 2.0", + "html_url": "https://github.com/huggingface/datasets/issues/6980", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6980/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "How can I load partial parquet files only?", + "html_url": "https://github.com/huggingface/datasets/issues/6979", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6979/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix regression for pandas < 2.0.0 in JSON loader", + "html_url": "https://github.com/huggingface/datasets/pull/6978", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6978/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6978.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6978', 'merged_at': '2024-06-19T05:50:18Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6978.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6978'}", + "is_pull_request": true + }, + { + "title": "load json file error with v2.20.0", + "html_url": "https://github.com/huggingface/datasets/issues/6977", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6977/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Ensure compatibility with numpy 2.0.0", + "html_url": "https://github.com/huggingface/datasets/pull/6976", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6976/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6976.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6976', 'merged_at': '2024-06-19T14:04:34Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6976.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6976'}", + "is_pull_request": true + }, + { + "title": "Set temporary numpy upper version < 2.0.0 to fix CI", + "html_url": "https://github.com/huggingface/datasets/pull/6975", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6975/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6975.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6975', 'merged_at': '2024-06-17T12:43:56Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6975.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6975'}", + "is_pull_request": true + }, + { + "title": "IndexError during training with Squad dataset and T5-small model", + "html_url": "https://github.com/huggingface/datasets/issues/6973", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6973/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix webdataset pickling", + "html_url": "https://github.com/huggingface/datasets/pull/6972", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6972/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6972.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6972', 'merged_at': '2024-06-14T15:37:35Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6972.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6972'}", + "is_pull_request": true + }, + { + "title": "packaging: Remove useless dependencies", + "html_url": "https://github.com/huggingface/datasets/pull/6971", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6971/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6971.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6971', 'merged_at': '2024-06-14T13:57:24Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6971.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6971'}", + "is_pull_request": true + }, + { + "title": "Set dev version", + "html_url": "https://github.com/huggingface/datasets/pull/6970", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6970/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6970.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6970', 'merged_at': '2024-06-13T14:59:56Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6970.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6970'}", + "is_pull_request": true + }, + { + "title": "Release: 2.20.0", + "html_url": "https://github.com/huggingface/datasets/pull/6969", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6969/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6969.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6969', 'merged_at': '2024-06-13T14:55:53Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6969.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6969'}", + "is_pull_request": true + }, + { + "title": "Use `HF_HUB_OFFLINE` instead of `HF_DATASETS_OFFLINE`", + "html_url": "https://github.com/huggingface/datasets/pull/6968", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6968/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6968.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6968', 'merged_at': '2024-06-13T17:25:37Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6968.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6968'}", + "is_pull_request": true + }, + { + "title": "Method to load Laion400m", + "html_url": "https://github.com/huggingface/datasets/issues/6967", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6967/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Remove underlines between badges", + "html_url": "https://github.com/huggingface/datasets/pull/6966", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6966/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6966.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6966', 'merged_at': '2024-06-19T14:10:11Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6966.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6966'}", + "is_pull_request": true + }, + { + "title": "Improve skip take shuffling and distributed", + "html_url": "https://github.com/huggingface/datasets/pull/6965", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6965/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6965.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6965', 'merged_at': '2024-06-24T15:16:16Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6965.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6965'}", + "is_pull_request": true + }, + { + "title": "Fix resuming arrow format", + "html_url": "https://github.com/huggingface/datasets/pull/6964", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6964/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6964.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6964', 'merged_at': '2024-06-14T14:58:37Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6964.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6964'}", + "is_pull_request": true + }, + { + "title": "[Streaming] retry on requests errors", + "html_url": "https://github.com/huggingface/datasets/pull/6963", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6963/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6963.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6963', 'merged_at': '2024-06-28T09:46:52Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6963.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6963'}", + "is_pull_request": true + }, + { + "title": "fix(ci): remove unnecessary permissions", + "html_url": "https://github.com/huggingface/datasets/pull/6962", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6962/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6962.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6962', 'merged_at': '2024-06-11T08:25:47Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6962.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6962'}", + "is_pull_request": true + }, + { + "title": "Manual downloads should count as downloads", + "html_url": "https://github.com/huggingface/datasets/issues/6961", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6961/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "feat(ci): add trufflehog secrets detection", + "html_url": "https://github.com/huggingface/datasets/pull/6960", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6960/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6960.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6960', 'merged_at': '2024-06-08T14:52:18Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6960.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6960'}", + "is_pull_request": true + }, + { + "title": "Better error handling in `dataset_module_factory`", + "html_url": "https://github.com/huggingface/datasets/pull/6959", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6959/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6959.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6959', 'merged_at': '2024-06-10T07:27:43Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6959.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6959'}", + "is_pull_request": true + }, + { + "title": "My Private Dataset doesn't exist on the Hub or cannot be accessed", + "html_url": "https://github.com/huggingface/datasets/issues/6958", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6958/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix typos in docs", + "html_url": "https://github.com/huggingface/datasets/pull/6957", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6957/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6957.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6957', 'merged_at': '2024-06-05T12:43:26Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6957.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6957'}", + "is_pull_request": true + }, + { + "title": "update docs on N-dim arrays", + "html_url": "https://github.com/huggingface/datasets/pull/6956", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6956/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6956.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6956', 'merged_at': '2024-06-04T16:40:27Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6956.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6956'}", + "is_pull_request": true + }, + { + "title": "Fix small typo", + "html_url": "https://github.com/huggingface/datasets/pull/6955", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6955/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6955.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6955', 'merged_at': '2024-06-04T15:20:55Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6955.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6955'}", + "is_pull_request": true + }, + { + "title": "Remove default `trust_remote_code=True`", + "html_url": "https://github.com/huggingface/datasets/pull/6954", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6954/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6954.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6954', 'merged_at': '2024-06-07T12:20:29Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6954.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6954'}", + "is_pull_request": true + }, + { + "title": "Remove canonical datasets from docs", + "html_url": "https://github.com/huggingface/datasets/issues/6953", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6953/comments", + "labels": "[{'color': '0075ca', 'default': True, 'description': 'Improvements or additions to documentation', 'id': 1935892861, 'name': 'documentation', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODYx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/documentation'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Move info_utils errors to exceptions module", + "html_url": "https://github.com/huggingface/datasets/pull/6952", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6952/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6952.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6952', 'merged_at': '2024-06-10T14:03:55Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6952.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6952'}", + "is_pull_request": true + }, + { + "title": "load_dataset() should load all subsets, if no specific subset is specified", + "html_url": "https://github.com/huggingface/datasets/issues/6951", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6951/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "`Dataset.with_format` behaves inconsistently with documentation", + "html_url": "https://github.com/huggingface/datasets/issues/6950", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6950/comments", + "labels": "[{'color': '0075ca', 'default': True, 'description': 'Improvements or additions to documentation', 'id': 1935892861, 'name': 'documentation', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODYx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/documentation'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "load_dataset error", + "html_url": "https://github.com/huggingface/datasets/issues/6949", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6949/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "to_tf_dataset: Visible devices cannot be modified after being initialized", + "html_url": "https://github.com/huggingface/datasets/issues/6948", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6948/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "FileNotFoundError\uff1aerror when loading C4 dataset", + "html_url": "https://github.com/huggingface/datasets/issues/6947", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6947/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Re-enable import sorting disabled by flake8:noqa directive when using ruff linter", + "html_url": "https://github.com/huggingface/datasets/pull/6946", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6946/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6946.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6946', 'merged_at': '2024-06-04T09:54:23Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6946.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6946'}", + "is_pull_request": true + }, + { + "title": "Update yanked version of minimum requests requirement", + "html_url": "https://github.com/huggingface/datasets/pull/6945", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6945/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6945.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6945', 'merged_at': '2024-06-03T06:09:43Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6945.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6945'}", + "is_pull_request": true + }, + { + "title": "Set dev version", + "html_url": "https://github.com/huggingface/datasets/pull/6944", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6944/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6944.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6944', 'merged_at': '2024-06-03T05:31:46Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6944.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6944'}", + "is_pull_request": true + }, + { + "title": "Release 2.19.2", + "html_url": "https://github.com/huggingface/datasets/pull/6943", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6943/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6943.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6943', 'merged_at': '2024-06-03T05:17:40Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6943.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6943'}", + "is_pull_request": true + }, + { + "title": "Import sorting is disabled by flake8 noqa directive after switching to ruff linter", + "html_url": "https://github.com/huggingface/datasets/issues/6942", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6942/comments", + "labels": "[{'color': 'd4c5f9', 'default': False, 'description': 'Maintenance tasks', 'id': 4296013012, 'name': 'maintenance', 'node_id': 'LA_kwDODunzps8AAAABAA_01A', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/maintenance'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Supporting FFCV: Fast Forward Computer Vision", + "html_url": "https://github.com/huggingface/datasets/issues/6941", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6941/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Enable Sharding to Equal Sized Shards", + "html_url": "https://github.com/huggingface/datasets/issues/6940", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6940/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "ExpectedMoreSplits error when using data_dir", + "html_url": "https://github.com/huggingface/datasets/issues/6939", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6939/comments", + "labels": "[{'color': 'd73a4a', 'default': True, 'description': \"Something isn't working\", 'id': 1935892857, 'name': 'bug', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODU3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/bug'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix expected splits when passing data_files or dir", + "html_url": "https://github.com/huggingface/datasets/pull/6938", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6938/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6938.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6938', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6938.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6938'}", + "is_pull_request": true + }, + { + "title": "JSON loader implicitly coerces floats to integers", + "html_url": "https://github.com/huggingface/datasets/issues/6937", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6937/comments", + "labels": "[{'color': 'd73a4a', 'default': True, 'description': \"Something isn't working\", 'id': 1935892857, 'name': 'bug', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODU3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/bug'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "save_to_disk() freezes when saving on s3 bucket with multiprocessing", + "html_url": "https://github.com/huggingface/datasets/issues/6936", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6936/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Support for pathlib.Path in datasets 2.19.0", + "html_url": "https://github.com/huggingface/datasets/issues/6935", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6935/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Revert ci user", + "html_url": "https://github.com/huggingface/datasets/pull/6934", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6934/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6934.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6934', 'merged_at': '2024-05-30T10:45:37Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6934.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6934'}", + "is_pull_request": true + }, + { + "title": "update ci user", + "html_url": "https://github.com/huggingface/datasets/pull/6933", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6933/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6933.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6933', 'merged_at': '2024-05-30T10:23:12Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6933.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6933'}", + "is_pull_request": true + }, + { + "title": "Update dataset_dict.py", + "html_url": "https://github.com/huggingface/datasets/pull/6932", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6932/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6932.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6932', 'merged_at': '2024-06-04T12:50:13Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6932.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6932'}", + "is_pull_request": true + }, + { + "title": "[WebDataset] Support compressed files", + "html_url": "https://github.com/huggingface/datasets/pull/6931", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6931/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6931.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6931', 'merged_at': '2024-05-29T16:24:21Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6931.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6931'}", + "is_pull_request": true + }, + { + "title": "ValueError: Couldn't infer the same data file format for all splits. Got {'train': ('json', {}), 'validation': (None, {})}", + "html_url": "https://github.com/huggingface/datasets/issues/6930", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6930/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Avoid downloading the whole dataset when only README.me has been touched on hub.", + "html_url": "https://github.com/huggingface/datasets/issues/6929", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6929/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Update process.mdx: Code Listings Fixes", + "html_url": "https://github.com/huggingface/datasets/pull/6928", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6928/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6928.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6928', 'merged_at': '2024-06-04T12:55:00Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6928.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6928'}", + "is_pull_request": true + }, + { + "title": "Update process.mdx: Minor Code Listings Updates and Fixes", + "html_url": "https://github.com/huggingface/datasets/pull/6927", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6927/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6927.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6927', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6927.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6927'}", + "is_pull_request": true + }, + { + "title": "Update process.mdx: Fix code listing in Shard section", + "html_url": "https://github.com/huggingface/datasets/pull/6926", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6926/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6926.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6926', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6926.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6926'}", + "is_pull_request": true + }, + { + "title": "Fix NonMatchingSplitsSizesError/ExpectedMoreSplits when passing data_dir/data_files in no-code Hub datasets", + "html_url": "https://github.com/huggingface/datasets/pull/6925", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6925/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6925.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6925', 'merged_at': '2024-05-31T17:10:37Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6925.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6925'}", + "is_pull_request": true + }, + { + "title": "Caching map result of DatasetDict.", + "html_url": "https://github.com/huggingface/datasets/issues/6924", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6924/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Export Parquet Tablet Audio-Set is null bytes in Arrow ", + "html_url": "https://github.com/huggingface/datasets/issues/6923", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6923/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Remove torchaudio remnants from code", + "html_url": "https://github.com/huggingface/datasets/pull/6922", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6922/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6922.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6922', 'merged_at': '2024-05-27T08:59:21Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6922.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6922'}", + "is_pull_request": true + }, + { + "title": "Support fsspec 2024.5.0", + "html_url": "https://github.com/huggingface/datasets/pull/6921", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6921/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6921.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6921', 'merged_at': '2024-05-27T08:01:08Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6921.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6921'}", + "is_pull_request": true + }, + { + "title": "[WebDataset] Add `.pth` support for torch tensors", + "html_url": "https://github.com/huggingface/datasets/pull/6920", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6920/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6920.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6920', 'merged_at': '2024-05-27T09:04:54Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6920.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6920'}", + "is_pull_request": true + }, + { + "title": "Invalid YAML in README.md: unknown tag !", + "html_url": "https://github.com/huggingface/datasets/issues/6919", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6919/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "NonMatchingSplitsSizesError when using data_dir", + "html_url": "https://github.com/huggingface/datasets/issues/6918", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6918/comments", + "labels": "[{'color': 'd73a4a', 'default': True, 'description': \"Something isn't working\", 'id': 1935892857, 'name': 'bug', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODU3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/bug'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "WinError 32 The process cannot access the file during load_dataset", + "html_url": "https://github.com/huggingface/datasets/issues/6917", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6917/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "```push_to_hub()``` - Prevent Automatic Generation of Splits ", + "html_url": "https://github.com/huggingface/datasets/issues/6916", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6916/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Validate config name and data_files in packaged modules", + "html_url": "https://github.com/huggingface/datasets/pull/6915", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6915/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6915.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6915', 'merged_at': '2024-06-06T09:24:35Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6915.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6915'}", + "is_pull_request": true + }, + { + "title": "Preserve JSON column order and support list of strings field", + "html_url": "https://github.com/huggingface/datasets/pull/6914", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6914/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6914.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6914', 'merged_at': '2024-05-29T13:12:23Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6914.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6914'}", + "is_pull_request": true + }, + { + "title": "Column order is nondeterministic when loading from JSON", + "html_url": "https://github.com/huggingface/datasets/issues/6913", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6913/comments", + "labels": "[{'color': 'd73a4a', 'default': True, 'description': \"Something isn't working\", 'id': 1935892857, 'name': 'bug', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODU3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/bug'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Add MedImg for streaming", + "html_url": "https://github.com/huggingface/datasets/issues/6912", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6912/comments", + "labels": "[{'color': 'e99695', 'default': False, 'description': 'Requesting to add a new dataset', 'id': 2067376369, 'name': 'dataset request', 'node_id': 'MDU6TGFiZWwyMDY3Mzc2MzY5', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/dataset%20request'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Remove dead code for non-dict data_files from packaged modules", + "html_url": "https://github.com/huggingface/datasets/pull/6911", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6911/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6911.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6911', 'merged_at': '2024-05-23T07:59:57Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6911.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6911'}", + "is_pull_request": true + }, + { + "title": "Fix wrong type hints in data_files", + "html_url": "https://github.com/huggingface/datasets/pull/6910", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6910/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6910.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6910', 'merged_at': '2024-05-23T05:58:05Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6910.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6910'}", + "is_pull_request": true + }, + { + "title": "Update requests >=2.32.1 to fix vulnerability", + "html_url": "https://github.com/huggingface/datasets/pull/6909", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6909/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6909.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6909', 'merged_at': '2024-05-21T07:38:25Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6909.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6909'}", + "is_pull_request": true + }, + { + "title": "Fail to load \"stas/c4-en-10k\" dataset since 2.16 version", + "html_url": "https://github.com/huggingface/datasets/issues/6908", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6908/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Support the deserialization of json lines files comprised of lists", + "html_url": "https://github.com/huggingface/datasets/issues/6907", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6907/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "irc_disentangle - Issue with splitting data", + "html_url": "https://github.com/huggingface/datasets/issues/6906", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6906/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Extraction protocol for arrow files is not defined", + "html_url": "https://github.com/huggingface/datasets/issues/6905", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6905/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix decoding multi part extension", + "html_url": "https://github.com/huggingface/datasets/pull/6904", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6904/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6904.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6904', 'merged_at': '2024-05-17T14:46:54Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6904.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6904'}", + "is_pull_request": true + }, + { + "title": "Add the option of saving in parquet instead of arrow ", + "html_url": "https://github.com/huggingface/datasets/issues/6903", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6903/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Make CLI convert_to_parquet not raise error if no rights to create script branch", + "html_url": "https://github.com/huggingface/datasets/pull/6902", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6902/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6902.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6902', 'merged_at': '2024-05-16T12:51:04Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6902.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6902'}", + "is_pull_request": true + }, + { + "title": "HTTPError 403 raised by CLI convert_to_parquet when creating script branch on 3rd party repos", + "html_url": "https://github.com/huggingface/datasets/issues/6901", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6901/comments", + "labels": "[{'color': 'd73a4a', 'default': True, 'description': \"Something isn't working\", 'id': 1935892857, 'name': 'bug', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODU3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/bug'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "[WebDataset] KeyError with user-defined `Features` when a field is missing in an example", + "html_url": "https://github.com/huggingface/datasets/issues/6900", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6900/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "List of dictionary features get standardized", + "html_url": "https://github.com/huggingface/datasets/issues/6899", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6899/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix YAML error in README files appearing on GitHub", + "html_url": "https://github.com/huggingface/datasets/pull/6898", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6898/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6898.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6898', 'merged_at': '2024-05-16T14:28:16Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6898.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6898'}", + "is_pull_request": true + }, + { + "title": "datasets template guide :: issue in documentation YAML", + "html_url": "https://github.com/huggingface/datasets/issues/6897", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6897/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Regression bug: `NonMatchingSplitsSizesError` for (possibly) overwritten dataset", + "html_url": "https://github.com/huggingface/datasets/issues/6896", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6896/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Document that to_json defaults to JSON Lines", + "html_url": "https://github.com/huggingface/datasets/pull/6895", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6895/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6895.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6895', 'merged_at': '2024-05-16T14:31:26Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6895.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6895'}", + "is_pull_request": true + }, + { + "title": "Better document defaults of to_json", + "html_url": "https://github.com/huggingface/datasets/issues/6894", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6894/comments", + "labels": "[{'color': '0075ca', 'default': True, 'description': 'Improvements or additions to documentation', 'id': 1935892861, 'name': 'documentation', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODYx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/documentation'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Close gzipped files properly", + "html_url": "https://github.com/huggingface/datasets/pull/6893", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6893/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6893.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6893', 'merged_at': '2024-05-13T13:01:54Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6893.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6893'}", + "is_pull_request": true + }, + { + "title": "Add support for categorical/dictionary types", + "html_url": "https://github.com/huggingface/datasets/pull/6892", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6892/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6892.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6892', 'merged_at': '2024-06-07T12:20:42Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6892.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6892'}", + "is_pull_request": true + }, + { + "title": "Unable to load JSON saved using `to_json`", + "html_url": "https://github.com/huggingface/datasets/issues/6891", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6891/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "add `with_transform` and/or `set_transform` to IterableDataset", + "html_url": "https://github.com/huggingface/datasets/issues/6890", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6890/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "fix bug #6877", + "html_url": "https://github.com/huggingface/datasets/pull/6889", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6889/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6889.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6889', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6889.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6889'}", + "is_pull_request": true + }, + { + "title": "Support WebDataset containing file basenames with dots", + "html_url": "https://github.com/huggingface/datasets/pull/6888", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6888/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6888.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6888', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6888.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6888'}", + "is_pull_request": true + }, + { + "title": "FAISS load to None", + "html_url": "https://github.com/huggingface/datasets/issues/6887", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6887/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "load_dataset with data_dir and cache_dir set fail with not supported", + "html_url": "https://github.com/huggingface/datasets/issues/6886", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6886/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Support jax 0.4.27 in CI tests", + "html_url": "https://github.com/huggingface/datasets/pull/6885", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6885/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6885.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6885', 'merged_at': '2024-05-08T09:35:16Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6885.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6885'}", + "is_pull_request": true + }, + { + "title": "CI is broken after jax-0.4.27 release: AttributeError: 'jaxlib.xla_extension.DeviceList' object has no attribute 'device'", + "html_url": "https://github.com/huggingface/datasets/issues/6884", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6884/comments", + "labels": "[{'color': 'd73a4a', 'default': True, 'description': \"Something isn't working\", 'id': 1935892857, 'name': 'bug', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODU3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/bug'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Require Pillow >= 9.4.0 to avoid AttributeError when loading image dataset", + "html_url": "https://github.com/huggingface/datasets/pull/6883", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6883/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6883.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6883', 'merged_at': '2024-05-16T14:34:02Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6883.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6883'}", + "is_pull_request": true + }, + { + "title": "Connection Error When Using By-pass Proxies", + "html_url": "https://github.com/huggingface/datasets/issues/6882", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6882/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "AttributeError: module 'PIL.Image' has no attribute 'ExifTags'", + "html_url": "https://github.com/huggingface/datasets/issues/6881", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6881/comments", + "labels": "[{'color': 'd73a4a', 'default': True, 'description': \"Something isn't working\", 'id': 1935892857, 'name': 'bug', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODU3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/bug'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Webdataset: KeyError: 'png' on some datasets when streaming", + "html_url": "https://github.com/huggingface/datasets/issues/6880", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6880/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Batched mapping does not raise an error if values for an existing column are empty", + "html_url": "https://github.com/huggingface/datasets/issues/6879", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6879/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Create function to convert to parquet", + "html_url": "https://github.com/huggingface/datasets/pull/6878", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6878/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6878.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6878', 'merged_at': '2024-05-16T14:38:22Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6878.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6878'}", + "is_pull_request": true + }, + { + "title": "OSError: [Errno 24] Too many open files", + "html_url": "https://github.com/huggingface/datasets/issues/6877", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6877/comments", + "labels": "[{'color': 'd73a4a', 'default': True, 'description': \"Something isn't working\", 'id': 1935892857, 'name': 'bug', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODU3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/bug'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Unpin hfh", + "html_url": "https://github.com/huggingface/datasets/pull/6876", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6876/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6876.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6876', 'merged_at': '2024-05-27T10:14:40Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6876.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6876'}", + "is_pull_request": true + }, + { + "title": "Shorten long logs", + "html_url": "https://github.com/huggingface/datasets/pull/6875", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6875/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6875.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6875', 'merged_at': '2024-05-07T12:25:45Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6875.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6875'}", + "is_pull_request": true + }, + { + "title": "Use pandas ujson in JSON loader to improve performance", + "html_url": "https://github.com/huggingface/datasets/pull/6874", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6874/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6874.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6874', 'merged_at': '2024-05-17T16:22:27Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6874.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6874'}", + "is_pull_request": true + }, + { + "title": "Set dev version", + "html_url": "https://github.com/huggingface/datasets/pull/6873", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6873/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6873.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6873', 'merged_at': '2024-05-06T09:57:12Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6873.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6873'}", + "is_pull_request": true + }, + { + "title": "Release 2.19.1", + "html_url": "https://github.com/huggingface/datasets/pull/6872", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6872/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6872.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6872', 'merged_at': '2024-05-06T09:35:32Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6872.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6872'}", + "is_pull_request": true + }, + { + "title": "Fix download for dict of dicts of URLs", + "html_url": "https://github.com/huggingface/datasets/pull/6871", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6871/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6871.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6871', 'merged_at': '2024-05-06T09:25:52Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6871.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6871'}", + "is_pull_request": true + }, + { + "title": "Update tqdm >= 4.66.3 to fix vulnerability", + "html_url": "https://github.com/huggingface/datasets/pull/6870", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6870/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6870.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6870', 'merged_at': '2024-05-06T06:02:00Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6870.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6870'}", + "is_pull_request": true + }, + { + "title": "Download is broken for dict of dicts: FileNotFoundError", + "html_url": "https://github.com/huggingface/datasets/issues/6869", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6869/comments", + "labels": "[{'color': 'd73a4a', 'default': True, 'description': \"Something isn't working\", 'id': 1935892857, 'name': 'bug', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODU3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/bug'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "datasets.BuilderConfig does not work.", + "html_url": "https://github.com/huggingface/datasets/issues/6868", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6868/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Improve performance of JSON loader", + "html_url": "https://github.com/huggingface/datasets/issues/6867", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6867/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "DataFilesNotFoundError for datasets in the open-llm-leaderboard", + "html_url": "https://github.com/huggingface/datasets/issues/6866", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6866/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Example on Semantic segmentation contains bug", + "html_url": "https://github.com/huggingface/datasets/issues/6865", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6865/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Dataset 'rewardsignal/reddit_writing_prompts' doesn't exist on the Hub", + "html_url": "https://github.com/huggingface/datasets/issues/6864", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6864/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Revert temporary pin huggingface-hub < 0.23.0", + "html_url": "https://github.com/huggingface/datasets/issues/6863", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6863/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Issue 6598: load_dataset broken for data_files on s3", + "html_url": "https://github.com/huggingface/datasets/pull/6862", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6862/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6862.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6862', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6862.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6862'}", + "is_pull_request": true + }, + { + "title": "Fix CI by temporarily pinning huggingface-hub < 0.23.0", + "html_url": "https://github.com/huggingface/datasets/pull/6861", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6861/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6861.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6861', 'merged_at': '2024-05-02T16:53:42Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6861.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6861'}", + "is_pull_request": true + }, + { + "title": "CI fails after huggingface_hub-0.23.0 release: FutureWarning: \"resume_download\"", + "html_url": "https://github.com/huggingface/datasets/issues/6860", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6860/comments", + "labels": "[{'color': 'd73a4a', 'default': True, 'description': \"Something isn't working\", 'id': 1935892857, 'name': 'bug', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODU3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/bug'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Support folder-based datasets with large metadata.jsonl", + "html_url": "https://github.com/huggingface/datasets/pull/6859", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6859/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6859.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6859', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6859.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6859'}", + "is_pull_request": true + }, + { + "title": "Segmentation fault", + "html_url": "https://github.com/huggingface/datasets/issues/6858", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6858/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix line-endings in tests on Windows", + "html_url": "https://github.com/huggingface/datasets/pull/6857", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6857/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6857.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6857', 'merged_at': '2024-05-02T11:43:00Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6857.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6857'}", + "is_pull_request": true + }, + { + "title": "CI fails on Windows for test_delete_from_hub and test_xgetsize_private due to new-line character", + "html_url": "https://github.com/huggingface/datasets/issues/6856", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6856/comments", + "labels": "[{'color': 'd73a4a', 'default': True, 'description': \"Something isn't working\", 'id': 1935892857, 'name': 'bug', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODU3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/bug'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix dataset name for community Hub script-datasets", + "html_url": "https://github.com/huggingface/datasets/pull/6855", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6855/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6855.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6855', 'merged_at': '2024-05-03T15:51:57Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6855.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6855'}", + "is_pull_request": true + }, + { + "title": "Wrong example of usage when config name is missing for community script-datasets", + "html_url": "https://github.com/huggingface/datasets/issues/6854", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6854/comments", + "labels": "[{'color': 'd73a4a', 'default': True, 'description': \"Something isn't working\", 'id': 1935892857, 'name': 'bug', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODU3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/bug'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Support soft links for load_datasets imagefolder", + "html_url": "https://github.com/huggingface/datasets/issues/6853", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6853/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Write token isn't working while pushing to datasets", + "html_url": "https://github.com/huggingface/datasets/issues/6852", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6852/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "load_dataset('emotion') UnicodeDecodeError", + "html_url": "https://github.com/huggingface/datasets/issues/6851", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6851/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Problem loading voxpopuli dataset ", + "html_url": "https://github.com/huggingface/datasets/issues/6850", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6850/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "fix webdataset filename split", + "html_url": "https://github.com/huggingface/datasets/pull/6849", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6849/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6849.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6849', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6849.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6849'}", + "is_pull_request": true + }, + { + "title": "Cant Downlaod Common Voice 17.0 hy-AM ", + "html_url": "https://github.com/huggingface/datasets/issues/6848", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6848/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "[Streaming] Only load requested splits without resolving files for the other splits", + "html_url": "https://github.com/huggingface/datasets/issues/6847", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6847/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Unimaginable super slow iteration", + "html_url": "https://github.com/huggingface/datasets/issues/6846", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6846/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "load_dataset doesn't support list column", + "html_url": "https://github.com/huggingface/datasets/issues/6845", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6845/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Retry on HF Hub error when streaming", + "html_url": "https://github.com/huggingface/datasets/pull/6844", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6844/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6844.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6844', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6844.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6844'}", + "is_pull_request": true + }, + { + "title": "IterableDataset raises exception instead of retrying", + "html_url": "https://github.com/huggingface/datasets/issues/6843", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6843/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Datasets with files with colon : in filenames cannot be used on Windows", + "html_url": "https://github.com/huggingface/datasets/issues/6842", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6842/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Unable to load wiki_auto_asset_turk from GEM", + "html_url": "https://github.com/huggingface/datasets/issues/6841", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6841/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Delete uploaded files from the UI", + "html_url": "https://github.com/huggingface/datasets/issues/6840", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6840/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Remove token arg from CLI examples", + "html_url": "https://github.com/huggingface/datasets/pull/6839", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6839/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6839.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6839', 'merged_at': '2024-04-26T16:57:40Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6839.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6839'}", + "is_pull_request": true + }, + { + "title": "Remove token arg from CLI examples", + "html_url": "https://github.com/huggingface/datasets/issues/6838", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6838/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Cannot use cached dataset without Internet connection (or when servers are down)", + "html_url": "https://github.com/huggingface/datasets/issues/6837", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6837/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "ExpectedMoreSplits error on load_dataset when upgrading to 2.19.0", + "html_url": "https://github.com/huggingface/datasets/issues/6836", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6836/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Support pyarrow LargeListType", + "html_url": "https://github.com/huggingface/datasets/pull/6835", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6835/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6835.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6835', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6835.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6835'}", + "is_pull_request": true + }, + { + "title": "largelisttype not supported (.from_polars())", + "html_url": "https://github.com/huggingface/datasets/issues/6834", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6834/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Super slow iteration with trivial custom transform", + "html_url": "https://github.com/huggingface/datasets/issues/6833", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6833/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Support downloading specific splits in `load_dataset`", + "html_url": "https://github.com/huggingface/datasets/pull/6832", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6832/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6832.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6832', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6832.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6832'}", + "is_pull_request": true + }, + { + "title": "Add docs about the CLI", + "html_url": "https://github.com/huggingface/datasets/pull/6831", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6831/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6831.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6831', 'merged_at': '2024-04-25T10:44:10Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6831.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6831'}", + "is_pull_request": true + }, + { + "title": "Add a doc page for the convert_to_parquet CLI", + "html_url": "https://github.com/huggingface/datasets/issues/6830", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6830/comments", + "labels": "[{'color': '0075ca', 'default': True, 'description': 'Improvements or additions to documentation', 'id': 1935892861, 'name': 'documentation', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODYx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/documentation'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Load and save from/to disk no longer accept pathlib.Path", + "html_url": "https://github.com/huggingface/datasets/issues/6829", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6829/comments", + "labels": "[{'color': 'd73a4a', 'default': True, 'description': \"Something isn't working\", 'id': 1935892857, 'name': 'bug', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODU3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/bug'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Support PathLike input in save_to_disk / load_from_disk", + "html_url": "https://github.com/huggingface/datasets/pull/6828", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6828/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6828.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6828', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6828.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6828'}", + "is_pull_request": true + }, + { + "title": "Loading a remote dataset fails in the last release (v2.19.0)", + "html_url": "https://github.com/huggingface/datasets/issues/6827", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6827/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Set dev version", + "html_url": "https://github.com/huggingface/datasets/pull/6826", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6826/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6826.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6826', 'merged_at': '2024-04-19T08:52:13Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6826.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6826'}", + "is_pull_request": true + }, + { + "title": "Release: 2.19.0", + "html_url": "https://github.com/huggingface/datasets/pull/6825", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6825/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6825.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6825', 'merged_at': '2024-04-19T08:44:57Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6825.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6825'}", + "is_pull_request": true + }, + { + "title": "Winogrande does not seem to be compatible with datasets version of 1.18.0", + "html_url": "https://github.com/huggingface/datasets/issues/6824", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6824/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Loading problems of Datasets with a single shard", + "html_url": "https://github.com/huggingface/datasets/issues/6823", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6823/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix parquet export infos", + "html_url": "https://github.com/huggingface/datasets/pull/6822", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6822/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6822.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6822', 'merged_at': '2024-04-18T11:09:13Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6822.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6822'}", + "is_pull_request": true + }, + { + "title": "Allow deleting a subset/config from a no-script dataset", + "html_url": "https://github.com/huggingface/datasets/pull/6820", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6820/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6820.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6820', 'merged_at': '2024-04-30T09:44:24Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6820.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6820'}", + "is_pull_request": true + }, + { + "title": "Give more details in `DataFilesNotFoundError` when getting the config names", + "html_url": "https://github.com/huggingface/datasets/issues/6819", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6819/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Support indexable objects in `Dataset.__getitem__`", + "html_url": "https://github.com/huggingface/datasets/pull/6817", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6817/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6817.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6817', 'merged_at': '2024-04-16T18:17:29Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6817.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6817'}", + "is_pull_request": true + }, + { + "title": "Improve typing of Dataset.search, matching definition", + "html_url": "https://github.com/huggingface/datasets/pull/6816", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6816/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6816.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6816', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6816.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6816'}", + "is_pull_request": true + }, + { + "title": "Remove `os.path.relpath` in `resolve_patterns`", + "html_url": "https://github.com/huggingface/datasets/pull/6815", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6815/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6815.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6815', 'merged_at': '2024-04-16T15:58:22Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6815.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6815'}", + "is_pull_request": true + }, + { + "title": "`map` with `num_proc` > 1 leads to OOM", + "html_url": "https://github.com/huggingface/datasets/issues/6814", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6814/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Add Dataset.take and Dataset.skip", + "html_url": "https://github.com/huggingface/datasets/pull/6813", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6813/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6813.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6813', 'merged_at': '2024-04-16T14:06:07Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6813.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6813'}", + "is_pull_request": true + }, + { + "title": "Run CI", + "html_url": "https://github.com/huggingface/datasets/pull/6812", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6812/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6812.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6812', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6812.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6812'}", + "is_pull_request": true + }, + { + "title": "add allow_primitive_to_str and allow_decimal_to_str instead of allow_number_to_str", + "html_url": "https://github.com/huggingface/datasets/pull/6811", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6811/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6811.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6811', 'merged_at': '2024-04-16T17:03:17Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6811.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6811'}", + "is_pull_request": true + }, + { + "title": "Allow deleting a subset/config from a no-script dataset", + "html_url": "https://github.com/huggingface/datasets/issues/6810", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6810/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Make convert_to_parquet CLI command create script branch", + "html_url": "https://github.com/huggingface/datasets/pull/6809", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6809/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6809.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6809', 'merged_at': '2024-04-17T08:38:18Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6809.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6809'}", + "is_pull_request": true + }, + { + "title": "Make convert_to_parquet CLI command create script branch", + "html_url": "https://github.com/huggingface/datasets/issues/6808", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6808/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix hf-internal-testing/dataset_with_script commit SHA in CI test", + "html_url": "https://github.com/huggingface/datasets/pull/6806", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6806/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6806.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6806', 'merged_at': '2024-04-12T09:02:12Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6806.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6806'}", + "is_pull_request": true + }, + { + "title": "Batched mapping of existing string column casts boolean to string", + "html_url": "https://github.com/huggingface/datasets/issues/6805", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6805/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix --repo-type order in cli upload docs", + "html_url": "https://github.com/huggingface/datasets/pull/6804", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6804/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6804.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6804', 'merged_at': '2024-04-11T16:18:47Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6804.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6804'}", + "is_pull_request": true + }, + { + "title": "#6791 Improve type checking around FAISS", + "html_url": "https://github.com/huggingface/datasets/pull/6803", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6803/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6803.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6803', 'merged_at': '2024-04-11T15:38:04Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6803.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6803'}", + "is_pull_request": true + }, + { + "title": "Fix typo in docs (upload CLI)", + "html_url": "https://github.com/huggingface/datasets/pull/6802", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6802/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6802.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6802', 'merged_at': '2024-04-11T13:19:43Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6802.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6802'}", + "is_pull_request": true + }, + { + "title": "got fileNotFound", + "html_url": "https://github.com/huggingface/datasets/issues/6801", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6801/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "High overhead when loading lots of subsets from the same dataset", + "html_url": "https://github.com/huggingface/datasets/issues/6800", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6800/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "fix `DatasetBuilder._split_generators` incomplete type annotation", + "html_url": "https://github.com/huggingface/datasets/pull/6799", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6799/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6799.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6799', 'merged_at': '2024-04-11T15:34:58Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6799.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6799'}", + "is_pull_request": true + }, + { + "title": "`DatasetBuilder._split_generators` incomplete type annotation", + "html_url": "https://github.com/huggingface/datasets/issues/6798", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6798/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix CI test_load_dataset_distributed_with_script", + "html_url": "https://github.com/huggingface/datasets/pull/6797", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6797/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6797.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6797', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6797.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6797'}", + "is_pull_request": true + }, + { + "title": "CI is broken due to hf-internal-testing/dataset_with_script", + "html_url": "https://github.com/huggingface/datasets/issues/6796", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6796/comments", + "labels": "[{'color': 'd73a4a', 'default': True, 'description': \"Something isn't working\", 'id': 1935892857, 'name': 'bug', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODU3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/bug'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Add CLI function to convert script-dataset to Parquet", + "html_url": "https://github.com/huggingface/datasets/pull/6795", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6795/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6795.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6795', 'merged_at': '2024-04-12T15:27:04Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6795.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6795'}", + "is_pull_request": true + }, + { + "title": "Multithreaded downloads", + "html_url": "https://github.com/huggingface/datasets/pull/6794", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6794/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6794.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6794', 'merged_at': '2024-04-15T21:18:08Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6794.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6794'}", + "is_pull_request": true + }, + { + "title": "Loading just one particular split is not possible for imagenet-1k", + "html_url": "https://github.com/huggingface/datasets/issues/6793", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6793/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix cache conflict in `_check_legacy_cache2`", + "html_url": "https://github.com/huggingface/datasets/pull/6792", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6792/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6792.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6792', 'merged_at': '2024-04-09T11:27:57Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6792.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6792'}", + "is_pull_request": true + }, + { + "title": "`add_faiss_index` raises ValueError: not enough values to unpack (expected 2, got 1)", + "html_url": "https://github.com/huggingface/datasets/issues/6791", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6791/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "PyArrow 'Memory mapping file failed: Cannot allocate memory' bug", + "html_url": "https://github.com/huggingface/datasets/issues/6790", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6790/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Issue with map", + "html_url": "https://github.com/huggingface/datasets/issues/6789", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6789/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "A Question About the Map Function", + "html_url": "https://github.com/huggingface/datasets/issues/6788", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6788/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "TimeoutError in map", + "html_url": "https://github.com/huggingface/datasets/issues/6787", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6787/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Make Image cast storage faster", + "html_url": "https://github.com/huggingface/datasets/pull/6786", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6786/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6786.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6786', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6786.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6786'}", + "is_pull_request": true + }, + { + "title": "rename datasets-server to dataset-viewer", + "html_url": "https://github.com/huggingface/datasets/pull/6785", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6785/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6785.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6785', 'merged_at': '2024-04-08T12:35:02Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6785.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6785'}", + "is_pull_request": true + }, + { + "title": "Extract data on the fly in packaged builders", + "html_url": "https://github.com/huggingface/datasets/pull/6784", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6784/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6784.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6784', 'merged_at': '2024-04-16T16:31:29Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6784.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6784'}", + "is_pull_request": true + }, + { + "title": "AttributeError: module 'numpy' has no attribute 'object'. in Kaggle Notebook", + "html_url": "https://github.com/huggingface/datasets/issues/6783", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6783/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Image cast_storage very slow for arrays (e.g. numpy, tensors)", + "html_url": "https://github.com/huggingface/datasets/issues/6782", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6782/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Remove get_inferred_type from ArrowWriter write_batch", + "html_url": "https://github.com/huggingface/datasets/pull/6781", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6781/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6781.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6781', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6781.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6781'}", + "is_pull_request": true + }, + { + "title": "Fix CI", + "html_url": "https://github.com/huggingface/datasets/pull/6780", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6780/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6780.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6780', 'merged_at': '2024-04-04T18:23:34Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6780.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6780'}", + "is_pull_request": true + }, + { + "title": "Install dependencies with `uv` in CI", + "html_url": "https://github.com/huggingface/datasets/pull/6779", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6779/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6779.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6779', 'merged_at': '2024-04-08T13:27:43Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6779.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6779'}", + "is_pull_request": true + }, + { + "title": "Dataset.to_csv() missing commas in columns with lists", + "html_url": "https://github.com/huggingface/datasets/issues/6778", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6778/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": ".Jsonl metadata not detected", + "html_url": "https://github.com/huggingface/datasets/issues/6777", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6777/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "IndexError: Invalid key: 0 is out of bounds for size 0", + "html_url": "https://github.com/huggingface/datasets/issues/6775", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6775/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": " Generating split is very slow when Image format is PNG", + "html_url": "https://github.com/huggingface/datasets/issues/6774", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6774/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Dataset on Hub re-downloads every time?", + "html_url": "https://github.com/huggingface/datasets/issues/6773", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6773/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "`remove_columns`/`rename_columns` doc fixes", + "html_url": "https://github.com/huggingface/datasets/pull/6772", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6772/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6772.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6772', 'merged_at': '2024-04-02T16:17:46Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6772.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6772'}", + "is_pull_request": true + }, + { + "title": "Datasets FileNotFoundError when trying to generate examples.", + "html_url": "https://github.com/huggingface/datasets/issues/6771", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6771/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "[Bug Report] `datasets==2.18.0` is not compatible with `fsspec==2023.12.2`", + "html_url": "https://github.com/huggingface/datasets/issues/6770", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6770/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "(Willing to PR) Datasets with custom python objects", + "html_url": "https://github.com/huggingface/datasets/issues/6769", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6769/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "fixing the issue 6755(small typo)", + "html_url": "https://github.com/huggingface/datasets/pull/6767", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6767/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6767.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6767', 'merged_at': '2024-04-02T14:01:18Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6767.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6767'}", + "is_pull_request": true + }, + { + "title": "Compatibility issue between s3fs, fsspec, and datasets", + "html_url": "https://github.com/huggingface/datasets/issues/6765", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6765/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "load_dataset can't work with symbolic links", + "html_url": "https://github.com/huggingface/datasets/issues/6764", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6764/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix issue with case sensitivity when loading dataset from local cache", + "html_url": "https://github.com/huggingface/datasets/pull/6763", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6763/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6763.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6763', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6763.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6763'}", + "is_pull_request": true + }, + { + "title": "Allow polars as valid output type", + "html_url": "https://github.com/huggingface/datasets/pull/6762", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6762/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6762.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6762', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6762.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6762'}", + "is_pull_request": true + }, + { + "title": "Remove deprecated code", + "html_url": "https://github.com/huggingface/datasets/pull/6761", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6761/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6761.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6761', 'merged_at': '2024-03-29T13:18:13Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6761.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6761'}", + "is_pull_request": true + }, + { + "title": "Load codeparrot/apps raising UnicodeDecodeError in datasets-2.18.0", + "html_url": "https://github.com/huggingface/datasets/issues/6760", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6760/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Persistent multi-process Pool", + "html_url": "https://github.com/huggingface/datasets/issues/6759", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6759/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Passing `sample_by` to `load_dataset` when loading text data does not work", + "html_url": "https://github.com/huggingface/datasets/issues/6758", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6758/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Test disabling transformers containers in docs CI", + "html_url": "https://github.com/huggingface/datasets/pull/6757", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6757/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6757.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6757', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6757.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6757'}", + "is_pull_request": true + }, + { + "title": "Support SQLite files?", + "html_url": "https://github.com/huggingface/datasets/issues/6756", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6756/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Small typo on the documentation", + "html_url": "https://github.com/huggingface/datasets/issues/6755", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6755/comments", + "labels": "[{'color': '7057ff', 'default': True, 'description': 'Good for newcomers', 'id': 1935892877, 'name': 'good first issue', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODc3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/good%20first%20issue'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix cache path to snakecase for `CachedDatasetModuleFactory` and `Cache`", + "html_url": "https://github.com/huggingface/datasets/pull/6754", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6754/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6754.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6754', 'merged_at': '2024-04-15T15:38:51Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6754.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6754'}", + "is_pull_request": true + }, + { + "title": "Type error when importing datasets on Kaggle", + "html_url": "https://github.com/huggingface/datasets/issues/6753", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6753/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Precision being changed from float16 to float32 unexpectedly", + "html_url": "https://github.com/huggingface/datasets/issues/6752", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6752/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Use 'with' operator for some download functions", + "html_url": "https://github.com/huggingface/datasets/pull/6751", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6751/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6751.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6751', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6751.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6751'}", + "is_pull_request": true + }, + { + "title": "`load_dataset` requires a network connection for local download?", + "html_url": "https://github.com/huggingface/datasets/issues/6750", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6750/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix fsspec tqdm callback", + "html_url": "https://github.com/huggingface/datasets/pull/6749", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6749/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6749.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6749', 'merged_at': '2024-03-22T14:45:39Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6749.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6749'}", + "is_pull_request": true + }, + { + "title": "Strange slicing behavior", + "html_url": "https://github.com/huggingface/datasets/issues/6748", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6748/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "chore(deps): bump fsspec", + "html_url": "https://github.com/huggingface/datasets/pull/6747", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6747/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6747.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6747', 'merged_at': '2024-03-22T16:28:40Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6747.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6747'}", + "is_pull_request": true + }, + { + "title": "ExpectedMoreSplits error when loading C4 dataset", + "html_url": "https://github.com/huggingface/datasets/issues/6746", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6746/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Scraping the whole of github including private repos is bad; kindly stop", + "html_url": "https://github.com/huggingface/datasets/issues/6745", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6745/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Option to disable file locking", + "html_url": "https://github.com/huggingface/datasets/issues/6744", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6744/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Allow null values in dict columns", + "html_url": "https://github.com/huggingface/datasets/pull/6743", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6743/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6743.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6743', 'merged_at': '2024-03-19T20:05:19Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6743.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6743'}", + "is_pull_request": true + }, + { + "title": "Fix missing download_config in get_data_patterns", + "html_url": "https://github.com/huggingface/datasets/pull/6742", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6742/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6742.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6742', 'merged_at': '2024-03-19T18:15:13Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6742.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6742'}", + "is_pull_request": true + }, + { + "title": "Fix offline mode with single config", + "html_url": "https://github.com/huggingface/datasets/pull/6741", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6741/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6741.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6741', 'merged_at': '2024-03-25T16:23:59Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6741.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6741'}", + "is_pull_request": true + }, + { + "title": "Support for loading geotiff files as a part of the ImageFolder", + "html_url": "https://github.com/huggingface/datasets/issues/6740", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6740/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Transpose images with EXIF Orientation tag", + "html_url": "https://github.com/huggingface/datasets/pull/6739", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6739/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6739.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6739', 'merged_at': '2024-03-19T15:29:41Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6739.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6739'}", + "is_pull_request": true + }, + { + "title": "Dict feature is non-nullable while nested dict feature is", + "html_url": "https://github.com/huggingface/datasets/issues/6738", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6738/comments", + "labels": "[{'color': 'd73a4a', 'default': True, 'description': \"Something isn't working\", 'id': 1935892857, 'name': 'bug', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODU3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/bug'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Invalid pattern: '**' can only be an entire path component", + "html_url": "https://github.com/huggingface/datasets/issues/6737", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6737/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Mosaic Streaming (MDS) Support", + "html_url": "https://github.com/huggingface/datasets/issues/6736", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6736/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Add `mode` parameter to `Image` feature", + "html_url": "https://github.com/huggingface/datasets/pull/6735", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6735/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6735.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6735', 'merged_at': '2024-03-18T15:41:33Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6735.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6735'}", + "is_pull_request": true + }, + { + "title": "Tokenization slows towards end of dataset", + "html_url": "https://github.com/huggingface/datasets/issues/6734", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6734/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "EmptyDatasetError when loading dataset downloaded with HuggingFace cli", + "html_url": "https://github.com/huggingface/datasets/issues/6733", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6733/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Unexpected behavior when using load_dataset with streaming=True in a for loop", + "html_url": "https://github.com/huggingface/datasets/issues/6731", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6731/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Deprecate Pandas builder", + "html_url": "https://github.com/huggingface/datasets/pull/6730", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6730/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6730.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6730', 'merged_at': '2024-03-12T17:36:24Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6730.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6730'}", + "is_pull_request": true + }, + { + "title": "Support zipfiles that span multiple disks?", + "html_url": "https://github.com/huggingface/datasets/issues/6729", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6729/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}\n {'color': 'd876e3', 'default': True, 'description': 'Further information is requested', 'id': 1935892912, 'name': 'question', 'node_id': 'MDU6TGFiZWwxOTM1ODkyOTEy', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/question'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Issue Downloading Certain Datasets After Setting Custom `HF_ENDPOINT`", + "html_url": "https://github.com/huggingface/datasets/issues/6728", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6728/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Using a registry instead of calling globals for fetching feature types", + "html_url": "https://github.com/huggingface/datasets/pull/6727", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6727/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6727.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6727', 'merged_at': '2024-03-13T10:46:02Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6727.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6727'}", + "is_pull_request": true + }, + { + "title": "Profiling for HF Filesystem shows there are easy performance gains to be made", + "html_url": "https://github.com/huggingface/datasets/issues/6726", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6726/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Request for a comparison of huggingface datasets compared with other data format especially webdataset", + "html_url": "https://github.com/huggingface/datasets/issues/6725", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6725/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Dataset with loading script does not work in renamed repos", + "html_url": "https://github.com/huggingface/datasets/issues/6724", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6724/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "get_dataset_default_config_name docstring", + "html_url": "https://github.com/huggingface/datasets/pull/6723", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6723/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6723.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6723', 'merged_at': '2024-03-07T17:21:20Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6723.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6723'}", + "is_pull_request": true + }, + { + "title": "Add details in docstring", + "html_url": "https://github.com/huggingface/datasets/pull/6722", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6722/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6722.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6722', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6722.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6722'}", + "is_pull_request": true + }, + { + "title": "Hi,do you know how to load the dataset from local file now?", + "html_url": "https://github.com/huggingface/datasets/issues/6721", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6721/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "TypeError: 'str' object is not callable", + "html_url": "https://github.com/huggingface/datasets/issues/6720", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6720/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Is there any way to solve hanging of IterableDataset using split by node + filtering during inference", + "html_url": "https://github.com/huggingface/datasets/issues/6719", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6719/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix concurrent script loading with force_redownload", + "html_url": "https://github.com/huggingface/datasets/pull/6718", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6718/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6718.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6718', 'merged_at': '2024-03-07T13:58:04Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6718.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6718'}", + "is_pull_request": true + }, + { + "title": "`remove_columns` method used with a streaming enable dataset mode produces a LibsndfileError on multichannel audio", + "html_url": "https://github.com/huggingface/datasets/issues/6717", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6717/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Non-deterministic `Dataset.builder_name` value", + "html_url": "https://github.com/huggingface/datasets/issues/6716", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6716/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix sliced ConcatenationTable pickling with mixed schemas vertically", + "html_url": "https://github.com/huggingface/datasets/pull/6715", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6715/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6715.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6715', 'merged_at': '2024-03-05T11:17:04Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6715.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6715'}", + "is_pull_request": true + }, + { + "title": "Expand no-code dataset info with datasets-server info", + "html_url": "https://github.com/huggingface/datasets/pull/6714", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6714/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6714.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6714', 'merged_at': '2024-03-04T20:22:15Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6714.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6714'}", + "is_pull_request": true + }, + { + "title": "Bump huggingface-hub lower version to 0.21.2", + "html_url": "https://github.com/huggingface/datasets/pull/6713", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6713/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6713.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6713', 'merged_at': '2024-03-04T18:06:05Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6713.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6713'}", + "is_pull_request": true + }, + { + "title": "fix CastError pickling", + "html_url": "https://github.com/huggingface/datasets/pull/6712", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6712/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6712.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6712', 'merged_at': '2024-03-04T20:17:17Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6712.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6712'}", + "is_pull_request": true + }, + { + "title": "3x Faster Text Preprocessing", + "html_url": "https://github.com/huggingface/datasets/pull/6711", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6711/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6711.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6711', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6711.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6711'}", + "is_pull_request": true + }, + { + "title": "Persist IterableDataset epoch in workers", + "html_url": "https://github.com/huggingface/datasets/pull/6710", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6710/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6710.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6710', 'merged_at': '2024-07-01T17:45:30Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6710.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6710'}", + "is_pull_request": true + }, + { + "title": "set dev version", + "html_url": "https://github.com/huggingface/datasets/pull/6709", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6709/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6709.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6709', 'merged_at': '2024-03-01T21:01:23Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6709.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6709'}", + "is_pull_request": true + }, + { + "title": "Release: 2.18.0", + "html_url": "https://github.com/huggingface/datasets/pull/6708", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6708/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6708.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6708', 'merged_at': '2024-03-01T20:56:50Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6708.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6708'}", + "is_pull_request": true + }, + { + "title": "Silence ruff deprecation messages", + "html_url": "https://github.com/huggingface/datasets/pull/6707", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6707/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6707.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6707', 'merged_at': '2024-03-01T17:25:46Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6707.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6707'}", + "is_pull_request": true + }, + { + "title": "Update ruff", + "html_url": "https://github.com/huggingface/datasets/pull/6706", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6706/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6706.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6706', 'merged_at': '2024-03-01T16:52:17Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6706.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6706'}", + "is_pull_request": true + }, + { + "title": "Fix data_files when passing data_dir", + "html_url": "https://github.com/huggingface/datasets/pull/6705", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6705/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6705.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6705', 'merged_at': '2024-03-01T18:52:49Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6705.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6705'}", + "is_pull_request": true + }, + { + "title": "Improve default patterns resolution", + "html_url": "https://github.com/huggingface/datasets/pull/6704", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6704/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6704.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6704', 'merged_at': '2024-03-15T15:22:03Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6704.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6704'}", + "is_pull_request": true + }, + { + "title": "Unable to load dataset that was saved with `save_to_disk`", + "html_url": "https://github.com/huggingface/datasets/issues/6703", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6703/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Push samples to dataset on hub without having the dataset locally", + "html_url": "https://github.com/huggingface/datasets/issues/6702", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6702/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Base parquet batch_size on parquet row group size", + "html_url": "https://github.com/huggingface/datasets/pull/6701", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6701/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6701.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6701', 'merged_at': '2024-02-29T15:08:55Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6701.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6701'}", + "is_pull_request": true + }, + { + "title": "remove_columns is not in-place but the doc shows it is in-place", + "html_url": "https://github.com/huggingface/datasets/issues/6700", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6700/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "`Dataset` unexpected changed dict data and may cause error", + "html_url": "https://github.com/huggingface/datasets/issues/6699", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6699/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Faster `xlistdir`", + "html_url": "https://github.com/huggingface/datasets/pull/6698", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6698/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6698.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6698', 'merged_at': '2024-02-27T23:38:14Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6698.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6698'}", + "is_pull_request": true + }, + { + "title": "Unable to Load Dataset in Kaggle", + "html_url": "https://github.com/huggingface/datasets/issues/6697", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6697/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Make JSON builder support an array of strings", + "html_url": "https://github.com/huggingface/datasets/pull/6696", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6696/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6696.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6696', 'merged_at': '2024-02-28T06:39:12Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6696.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6696'}", + "is_pull_request": true + }, + { + "title": "Support JSON file with an array of strings", + "html_url": "https://github.com/huggingface/datasets/issues/6695", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6695/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "__add__ for Dataset, IterableDataset", + "html_url": "https://github.com/huggingface/datasets/pull/6694", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6694/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6694.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6694', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6694.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6694'}", + "is_pull_request": true + }, + { + "title": "Update the print message for chunked_dataset in process.mdx", + "html_url": "https://github.com/huggingface/datasets/pull/6693", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6693/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6693.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6693', 'merged_at': '2024-02-25T19:51:02Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6693.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6693'}", + "is_pull_request": true + }, + { + "title": "Enhancement: Enable loading TSV files in load_dataset()", + "html_url": "https://github.com/huggingface/datasets/pull/6692", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6692/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6692.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6692', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6692.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6692'}", + "is_pull_request": true + }, + { + "title": "load_dataset() does not support tsv", + "html_url": "https://github.com/huggingface/datasets/issues/6691", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6691/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Add function to convert a script-dataset to Parquet", + "html_url": "https://github.com/huggingface/datasets/issues/6690", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6690/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": ".load_dataset() method defaults to zstandard", + "html_url": "https://github.com/huggingface/datasets/issues/6689", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6689/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Tensor type (e.g. from `return_tensors`) ignored in map", + "html_url": "https://github.com/huggingface/datasets/issues/6688", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6688/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "fsspec: support fsspec>=2023.12.0 glob changes", + "html_url": "https://github.com/huggingface/datasets/pull/6687", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6687/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6687.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6687', 'merged_at': '2024-02-29T15:12:17Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6687.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6687'}", + "is_pull_request": true + }, + { + "title": "Question: Is there any way for uploading a large image dataset?", + "html_url": "https://github.com/huggingface/datasets/issues/6686", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6686/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Updated Quickstart Notebook link", + "html_url": "https://github.com/huggingface/datasets/pull/6685", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6685/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6685.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6685', 'merged_at': '2024-02-25T18:48:08Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6685.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6685'}", + "is_pull_request": true + }, + { + "title": "Improve error message for gated datasets on load", + "html_url": "https://github.com/huggingface/datasets/pull/6684", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6684/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6684.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6684', 'merged_at': '2024-02-20T15:33:56Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6684.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6684'}", + "is_pull_request": true + }, + { + "title": "Fix imagefolder dataset url", + "html_url": "https://github.com/huggingface/datasets/pull/6683", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6683/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6683.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6683', 'merged_at': '2024-02-19T17:18:10Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6683.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6683'}", + "is_pull_request": true + }, + { + "title": "Update GitHub Actions to Node 20", + "html_url": "https://github.com/huggingface/datasets/pull/6682", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6682/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6682.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6682', 'merged_at': '2024-02-28T06:56:34Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6682.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6682'}", + "is_pull_request": true + }, + { + "title": "Update release instructions", + "html_url": "https://github.com/huggingface/datasets/pull/6681", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6681/comments", + "labels": "[{'color': 'd4c5f9', 'default': False, 'description': 'Maintenance tasks', 'id': 4296013012, 'name': 'maintenance', 'node_id': 'LA_kwDODunzps8AAAABAA_01A', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/maintenance'}]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6681.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6681', 'merged_at': '2024-02-28T07:17:22Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6681.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6681'}", + "is_pull_request": true + }, + { + "title": "Set dev version", + "html_url": "https://github.com/huggingface/datasets/pull/6680", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6680/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6680.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6680', 'merged_at': '2024-02-19T10:00:40Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6680.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6680'}", + "is_pull_request": true + }, + { + "title": "Node.js 16 GitHub Actions are deprecated", + "html_url": "https://github.com/huggingface/datasets/issues/6679", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6679/comments", + "labels": "[{'color': 'd4c5f9', 'default': False, 'description': 'Maintenance tasks', 'id': 4296013012, 'name': 'maintenance', 'node_id': 'LA_kwDODunzps8AAAABAA_01A', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/maintenance'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Release: 2.17.1", + "html_url": "https://github.com/huggingface/datasets/pull/6678", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6678/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6678.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6678', 'merged_at': '2024-02-19T09:56:52Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6678.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6678'}", + "is_pull_request": true + }, + { + "title": "Pass through information about location of cache directory.", + "html_url": "https://github.com/huggingface/datasets/pull/6677", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6677/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6677.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6677', 'merged_at': '2024-02-28T18:51:15Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6677.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6677'}", + "is_pull_request": true + }, + { + "title": "Can't Read List of JSON Files Properly", + "html_url": "https://github.com/huggingface/datasets/issues/6676", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6676/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Allow image model (color conversion) to be specified as part of datasets Image() decode", + "html_url": "https://github.com/huggingface/datasets/issues/6675", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6675/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": " Depprcated Overview.ipynb Link to new Quickstart Notebook invalid", + "html_url": "https://github.com/huggingface/datasets/issues/6674", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6674/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "IterableDataset `set_epoch` is ignored when DataLoader `persistent_workers=True`", + "html_url": "https://github.com/huggingface/datasets/issues/6673", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6673/comments", + "labels": "[{'color': 'd73a4a', 'default': True, 'description': \"Something isn't working\", 'id': 1935892857, 'name': 'bug', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODU3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/bug'}\n {'color': 'fef2c0', 'default': False, 'description': '', 'id': 3287858981, 'name': 'streaming', 'node_id': 'MDU6TGFiZWwzMjg3ODU4OTgx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/streaming'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Remove deprecated verbose parameter from CSV builder", + "html_url": "https://github.com/huggingface/datasets/pull/6672", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6672/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6672.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6672', 'merged_at': '2024-02-19T09:20:22Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6672.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6672'}", + "is_pull_request": true + }, + { + "title": "CSV builder raises deprecation warning on verbose parameter", + "html_url": "https://github.com/huggingface/datasets/issues/6671", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6671/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "ValueError", + "html_url": "https://github.com/huggingface/datasets/issues/6670", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6670/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "attribute error when writing trainer.train()", + "html_url": "https://github.com/huggingface/datasets/issues/6669", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6669/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Chapter 6 - Issue Loading `cnn_dailymail` dataset", + "html_url": "https://github.com/huggingface/datasets/issues/6668", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6668/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Default config for squad is incorrect", + "html_url": "https://github.com/huggingface/datasets/issues/6667", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6667/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Allow SplitDict setitem to replace existing SplitInfo", + "html_url": "https://github.com/huggingface/datasets/pull/6665", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6665/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6665.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6665', 'merged_at': '2024-03-01T15:56:38Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6665.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6665'}", + "is_pull_request": true + }, + { + "title": "Revert the changes in `arrow_writer.py` from #6636", + "html_url": "https://github.com/huggingface/datasets/pull/6664", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6664/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6664.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6664', 'merged_at': '2024-02-16T02:31:11Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6664.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6664'}", + "is_pull_request": true + }, + { + "title": "`write_examples_on_file` and `write_batch` are broken in `ArrowWriter`", + "html_url": "https://github.com/huggingface/datasets/issues/6663", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6663/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "fix: show correct package name to install biopython", + "html_url": "https://github.com/huggingface/datasets/pull/6662", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6662/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6662.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6662', 'merged_at': '2024-03-01T17:43:39Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6662.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6662'}", + "is_pull_request": true + }, + { + "title": "Import error on Google Colab", + "html_url": "https://github.com/huggingface/datasets/issues/6661", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6661/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Automatic Conversion for uint16/uint32 to Compatible PyTorch Dtypes", + "html_url": "https://github.com/huggingface/datasets/pull/6660", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6660/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6660.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6660', 'merged_at': '2024-03-01T18:52:37Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6660.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6660'}", + "is_pull_request": true + }, + { + "title": "Change default compression argument for JsonDatasetWriter", + "html_url": "https://github.com/huggingface/datasets/pull/6659", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6659/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6659.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6659', 'merged_at': '2024-03-01T17:44:55Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6659.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6659'}", + "is_pull_request": true + }, + { + "title": "[Resumable IterableDataset] Add IterableDataset state_dict", + "html_url": "https://github.com/huggingface/datasets/pull/6658", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6658/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6658.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6658', 'merged_at': '2024-06-03T19:15:39Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6658.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6658'}", + "is_pull_request": true + }, + { + "title": "Release not pushed to conda channel", + "html_url": "https://github.com/huggingface/datasets/issues/6657", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6657/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Error when loading a big local json file", + "html_url": "https://github.com/huggingface/datasets/issues/6656", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6656/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Cannot load the dataset go_emotions", + "html_url": "https://github.com/huggingface/datasets/issues/6655", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6655/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Batched dataset map throws exception that cannot cast fixed length array to Sequence", + "html_url": "https://github.com/huggingface/datasets/issues/6654", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6654/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Set dev version", + "html_url": "https://github.com/huggingface/datasets/pull/6653", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6653/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6653.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6653', 'merged_at': '2024-02-09T10:12:12Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6653.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6653'}", + "is_pull_request": true + }, + { + "title": "Release: 2.17.0", + "html_url": "https://github.com/huggingface/datasets/pull/6652", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6652/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6652.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6652', 'merged_at': '2024-02-09T10:05:35Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6652.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6652'}", + "is_pull_request": true + }, + { + "title": "Slice splits support for datasets.load_from_disk", + "html_url": "https://github.com/huggingface/datasets/issues/6651", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6651/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "AttributeError: 'InMemoryTable' object has no attribute '_batches'", + "html_url": "https://github.com/huggingface/datasets/issues/6650", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6650/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Minor multi gpu doc improvement", + "html_url": "https://github.com/huggingface/datasets/pull/6649", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6649/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6649.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6649', 'merged_at': '2024-02-08T11:17:35Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6649.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6649'}", + "is_pull_request": true + }, + { + "title": "Document usage of hfh cli instead of git", + "html_url": "https://github.com/huggingface/datasets/pull/6648", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6648/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6648.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6648', 'merged_at': '2024-02-08T13:51:39Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6648.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6648'}", + "is_pull_request": true + }, + { + "title": "Update loading.mdx to include \"jsonl\" file loading.", + "html_url": "https://github.com/huggingface/datasets/pull/6647", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6647/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6647.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6647', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6647.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6647'}", + "is_pull_request": true + }, + { + "title": "Better multi-gpu example", + "html_url": "https://github.com/huggingface/datasets/pull/6646", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6646/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6646.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6646', 'merged_at': '2024-02-07T14:59:11Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6646.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6646'}", + "is_pull_request": true + }, + { + "title": "Support fsspec 2024.2", + "html_url": "https://github.com/huggingface/datasets/issues/6645", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6645/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Support fsspec 2023.12", + "html_url": "https://github.com/huggingface/datasets/issues/6644", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6644/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Faiss GPU index cannot be serialised when passed to trainer ", + "html_url": "https://github.com/huggingface/datasets/issues/6643", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6643/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Differently dataset object saved than it is loaded.", + "html_url": "https://github.com/huggingface/datasets/issues/6642", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6642/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "unicodedecodeerror: 'utf-8' codec can't decode byte 0xac in position 25: invalid start byte", + "html_url": "https://github.com/huggingface/datasets/issues/6641", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6641/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Sign Language Support", + "html_url": "https://github.com/huggingface/datasets/issues/6640", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6640/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Run download_and_prepare if missing splits", + "html_url": "https://github.com/huggingface/datasets/pull/6639", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6639/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6639.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6639', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6639.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6639'}", + "is_pull_request": true + }, + { + "title": "Cannot download wmt16 dataset", + "html_url": "https://github.com/huggingface/datasets/issues/6638", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6638/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "'with_format' is extremely slow when used together with 'interleave_datasets' or 'shuffle' on IterableDatasets", + "html_url": "https://github.com/huggingface/datasets/issues/6637", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6637/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Faster column validation and reordering", + "html_url": "https://github.com/huggingface/datasets/pull/6636", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6636/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6636.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6636', 'merged_at': '2024-02-06T23:03:38Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6636.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6636'}", + "is_pull_request": true + }, + { + "title": "Fix missing info when loading some datasets from Parquet export", + "html_url": "https://github.com/huggingface/datasets/pull/6635", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6635/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6635.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6635', 'merged_at': '2024-02-07T16:41:04Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6635.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6635'}", + "is_pull_request": true + }, + { + "title": "Support data_dir parameter in push_to_hub", + "html_url": "https://github.com/huggingface/datasets/pull/6634", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6634/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6634.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6634', 'merged_at': '2024-02-05T10:26:40Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6634.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6634'}", + "is_pull_request": true + }, + { + "title": "dataset viewer requires no-script", + "html_url": "https://github.com/huggingface/datasets/pull/6633", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6633/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6633.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6633', 'merged_at': '2024-01-31T13:59:01Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6633.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6633'}", + "is_pull_request": true + }, + { + "title": "Fix reload cache with data dir", + "html_url": "https://github.com/huggingface/datasets/pull/6632", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6632/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6632.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6632', 'merged_at': '2024-02-06T17:21:24Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6632.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6632'}", + "is_pull_request": true + }, + { + "title": "Fix filelock: use current umask for filelock >= 3.10", + "html_url": "https://github.com/huggingface/datasets/pull/6631", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6631/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6631.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6631', 'merged_at': '2024-01-30T15:28:37Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6631.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6631'}", + "is_pull_request": true + }, + { + "title": "Bump max range of dill to 0.3.8", + "html_url": "https://github.com/huggingface/datasets/pull/6630", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6630/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6630.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6630', 'merged_at': '2024-01-30T15:12:25Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6630.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6630'}", + "is_pull_request": true + }, + { + "title": "Support push_to_hub without org/user to default to logged-in user", + "html_url": "https://github.com/huggingface/datasets/pull/6629", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6629/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6629.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6629', 'merged_at': '2024-02-05T12:29:36Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6629.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6629'}", + "is_pull_request": true + }, + { + "title": "Make CLI test support multi-processing", + "html_url": "https://github.com/huggingface/datasets/pull/6628", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6628/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6628.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6628', 'merged_at': '2024-02-05T10:23:13Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6628.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6628'}", + "is_pull_request": true + }, + { + "title": "Disable `tqdm` bars in non-interactive environments", + "html_url": "https://github.com/huggingface/datasets/pull/6627", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6627/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6627.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6627', 'merged_at': '2024-01-29T15:41:32Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6627.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6627'}", + "is_pull_request": true + }, + { + "title": "Raise error on bad split name", + "html_url": "https://github.com/huggingface/datasets/pull/6626", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6626/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6626.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6626', 'merged_at': '2024-01-29T15:12:18Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6626.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6626'}", + "is_pull_request": true + }, + { + "title": "How to download the laion-coco dataset", + "html_url": "https://github.com/huggingface/datasets/issues/6624", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6624/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "streaming datasets doesn't work properly with multi-node", + "html_url": "https://github.com/huggingface/datasets/issues/6623", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6623/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "multi-GPU map does not work", + "html_url": "https://github.com/huggingface/datasets/issues/6622", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6622/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "deleted", + "html_url": "https://github.com/huggingface/datasets/issues/6621", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6621/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "wiki_dpr.py error (ID mismatch between lines {id} and vector {vec_id}", + "html_url": "https://github.com/huggingface/datasets/issues/6620", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6620/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Migrate from `setup.cfg` to `pyproject.toml`", + "html_url": "https://github.com/huggingface/datasets/pull/6619", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6619/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6619.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6619', 'merged_at': '2024-01-26T15:47:32Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6619.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6619'}", + "is_pull_request": true + }, + { + "title": "While importing load_dataset from datasets ", + "html_url": "https://github.com/huggingface/datasets/issues/6618", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6618/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix CI: pyarrow 15, pandas 2.2 and sqlachemy", + "html_url": "https://github.com/huggingface/datasets/pull/6617", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6617/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6617.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6617', 'merged_at': '2024-01-26T14:50:44Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6617.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6617'}", + "is_pull_request": true + }, + { + "title": "Use schema metadata only if it matches features", + "html_url": "https://github.com/huggingface/datasets/pull/6616", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6616/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6616.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6616', 'merged_at': '2024-01-26T16:19:12Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6616.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6616'}", + "is_pull_request": true + }, + { + "title": "...", + "html_url": "https://github.com/huggingface/datasets/issues/6615", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6615/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "`datasets/downloads` cleanup tool", + "html_url": "https://github.com/huggingface/datasets/issues/6614", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6614/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "cnn_dailymail repeats itself", + "html_url": "https://github.com/huggingface/datasets/issues/6612", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6612/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "`load_from_disk` with large dataset from S3 runs into `botocore.exceptions.ClientError`", + "html_url": "https://github.com/huggingface/datasets/issues/6611", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6611/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "cast_column to Sequence(subfeatures_dict) has err", + "html_url": "https://github.com/huggingface/datasets/issues/6610", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6610/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Wrong path for cache directory in offline mode", + "html_url": "https://github.com/huggingface/datasets/issues/6609", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6609/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Add `with_rank` param to `Dataset.filter`", + "html_url": "https://github.com/huggingface/datasets/pull/6608", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6608/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6608.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6608', 'merged_at': '2024-01-29T16:36:53Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6608.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6608'}", + "is_pull_request": true + }, + { + "title": "Update features.py to avoid bfloat16 unsupported error", + "html_url": "https://github.com/huggingface/datasets/pull/6607", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6607/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6607.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6607', 'merged_at': '2024-05-17T09:40:13Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6607.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6607'}", + "is_pull_request": true + }, + { + "title": "Dedicated RNG object for fingerprinting", + "html_url": "https://github.com/huggingface/datasets/pull/6606", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6606/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6606.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6606', 'merged_at': '2024-01-26T15:05:34Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6606.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6606'}", + "is_pull_request": true + }, + { + "title": "ELI5 no longer available, but referenced in example code", + "html_url": "https://github.com/huggingface/datasets/issues/6605", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6605/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Transform fingerprint collisions due to setting fixed random seed", + "html_url": "https://github.com/huggingface/datasets/issues/6604", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6604/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "datasets map `cache_file_name` does not work", + "html_url": "https://github.com/huggingface/datasets/issues/6603", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6603/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Index error when data is large", + "html_url": "https://github.com/huggingface/datasets/issues/6602", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6602/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "add safety checks when using only part of dataset", + "html_url": "https://github.com/huggingface/datasets/pull/6601", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6601/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6601.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6601', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6601.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6601'}", + "is_pull_request": true + }, + { + "title": "Loading CSV exported dataset has unexpected format", + "html_url": "https://github.com/huggingface/datasets/issues/6600", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6600/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Easy way to segment into 30s snippets given an m4a file and a vtt file ", + "html_url": "https://github.com/huggingface/datasets/issues/6599", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6599/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Unexpected keyword argument 'hf' when downloading CSV dataset from S3", + "html_url": "https://github.com/huggingface/datasets/issues/6598", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6598/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Dataset.push_to_hub of a canonical dataset creates an additional dataset under the user namespace", + "html_url": "https://github.com/huggingface/datasets/issues/6597", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6597/comments", + "labels": "[{'color': 'd73a4a', 'default': True, 'description': \"Something isn't working\", 'id': 1935892857, 'name': 'bug', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODU3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/bug'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Drop redundant None guard.", + "html_url": "https://github.com/huggingface/datasets/pull/6596", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6596/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6596.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6596', 'merged_at': '2024-01-16T17:05:52Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6596.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6596'}", + "is_pull_request": true + }, + { + "title": "Loading big dataset raises pyarrow.lib.ArrowNotImplementedError 2", + "html_url": "https://github.com/huggingface/datasets/issues/6595", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6595/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "IterableDataset sharding logic needs improvement", + "html_url": "https://github.com/huggingface/datasets/issues/6594", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6594/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Logs are delayed when doing .map when `docker logs`", + "html_url": "https://github.com/huggingface/datasets/issues/6592", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6592/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "The datasets models housed in Dropbox can't support a lot of users downloading them", + "html_url": "https://github.com/huggingface/datasets/issues/6591", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6591/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Feature request: Multi-GPU dataset mapping for SDXL training", + "html_url": "https://github.com/huggingface/datasets/issues/6590", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6590/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "After `2.16.0` version, there are `PermissionError` when users use shared cache_dir", + "html_url": "https://github.com/huggingface/datasets/issues/6589", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6589/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "fix os.listdir return name is empty string", + "html_url": "https://github.com/huggingface/datasets/issues/6588", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6588/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Allow concatenation of datasets with mixed structs", + "html_url": "https://github.com/huggingface/datasets/pull/6587", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6587/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6587.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6587', 'merged_at': '2024-02-08T14:38:32Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6587.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6587'}", + "is_pull_request": true + }, + { + "title": "keep more info in DatasetInfo.from_merge #6585", + "html_url": "https://github.com/huggingface/datasets/pull/6586", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6586/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6586.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6586', 'merged_at': '2024-01-26T15:53:28Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6586.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6586'}", + "is_pull_request": true + }, + { + "title": "losing DatasetInfo in Dataset.map when num_proc > 1", + "html_url": "https://github.com/huggingface/datasets/issues/6585", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6585/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "np.fromfile not supported", + "html_url": "https://github.com/huggingface/datasets/issues/6584", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6584/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "remove eli5 test", + "html_url": "https://github.com/huggingface/datasets/pull/6583", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6583/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6583.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6583', 'merged_at': '2024-01-11T16:09:24Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6583.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6583'}", + "is_pull_request": true + }, + { + "title": "Fix for Incorrect ex_iterable used with multi num_worker", + "html_url": "https://github.com/huggingface/datasets/pull/6582", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6582/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6582.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6582', 'merged_at': '2024-03-01T19:02:33Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6582.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6582'}", + "is_pull_request": true + }, + { + "title": "fix os.listdir return name is empty string", + "html_url": "https://github.com/huggingface/datasets/pull/6581", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6581/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6581.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6581', 'merged_at': '2024-01-24T10:08:28Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6581.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6581'}", + "is_pull_request": true + }, + { + "title": "dataset cache only stores one config of the dataset in parquet dir, and uses that for all other configs resulting in showing same data in all configs.", + "html_url": "https://github.com/huggingface/datasets/issues/6580", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6580/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Unable to load `eli5` dataset with streaming", + "html_url": "https://github.com/huggingface/datasets/issues/6579", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6579/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Faster webdataset streaming", + "html_url": "https://github.com/huggingface/datasets/pull/6578", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6578/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6578.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6578', 'merged_at': '2024-01-30T18:39:51Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6578.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6578'}", + "is_pull_request": true + }, + { + "title": "502 Server Errors when streaming large dataset", + "html_url": "https://github.com/huggingface/datasets/issues/6577", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6577/comments", + "labels": "[{'color': 'fef2c0', 'default': False, 'description': '', 'id': 3287858981, 'name': 'streaming', 'node_id': 'MDU6TGFiZWwzMjg3ODU4OTgx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/streaming'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "document page 404 not found after redirection", + "html_url": "https://github.com/huggingface/datasets/issues/6576", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6576/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "[IterableDataset] Fix `drop_last_batch`in map after shuffling or sharding", + "html_url": "https://github.com/huggingface/datasets/pull/6575", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6575/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6575.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6575', 'merged_at': '2024-01-11T16:10:30Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6575.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6575'}", + "is_pull_request": true + }, + { + "title": "Fix tests based on datasets that used to have scripts", + "html_url": "https://github.com/huggingface/datasets/pull/6574", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6574/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6574.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6574', 'merged_at': '2024-01-09T16:05:13Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6574.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6574'}", + "is_pull_request": true + }, + { + "title": "[WebDataset] Audio support and bug fixes", + "html_url": "https://github.com/huggingface/datasets/pull/6573", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6573/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6573.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6573', 'merged_at': '2024-01-11T16:11:04Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6573.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6573'}", + "is_pull_request": true + }, + { + "title": "Adding option for multipart achive download", + "html_url": "https://github.com/huggingface/datasets/pull/6572", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6572/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6572.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6572', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6572.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6572'}", + "is_pull_request": true + }, + { + "title": "Make DatasetDict.column_names return a list instead of dict", + "html_url": "https://github.com/huggingface/datasets/issues/6571", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6571/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "No online docs for 2.16 release", + "html_url": "https://github.com/huggingface/datasets/issues/6570", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6570/comments", + "labels": "[{'color': 'd73a4a', 'default': True, 'description': \"Something isn't working\", 'id': 1935892857, 'name': 'bug', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODU3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/bug'}\n {'color': '0075ca', 'default': True, 'description': 'Improvements or additions to documentation', 'id': 1935892861, 'name': 'documentation', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODYx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/documentation'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "WebDataset ignores features defined in YAML or passed to load_dataset", + "html_url": "https://github.com/huggingface/datasets/issues/6569", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6569/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "keep_in_memory=True does not seem to work", + "html_url": "https://github.com/huggingface/datasets/issues/6568", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6568/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "AttributeError: 'str' object has no attribute 'to'", + "html_url": "https://github.com/huggingface/datasets/issues/6567", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6567/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "I train controlnet_sdxl in bf16 datatype, got unsupported ERROR in datasets", + "html_url": "https://github.com/huggingface/datasets/issues/6566", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6566/comments", + "labels": "[{'color': 'd73a4a', 'default': True, 'description': \"Something isn't working\", 'id': 1935892857, 'name': 'bug', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODU3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/bug'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": " `drop_last_batch=True` for IterableDataset map function is ignored with multiprocessing DataLoader ", + "html_url": "https://github.com/huggingface/datasets/issues/6565", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6565/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "`Dataset.filter` missing `with_rank` parameter", + "html_url": "https://github.com/huggingface/datasets/issues/6564", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6564/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "`ImportError`: cannot import name 'insecure_hashlib' from 'huggingface_hub.utils' (.../huggingface_hub/utils/__init__.py)", + "html_url": "https://github.com/huggingface/datasets/issues/6563", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6563/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "datasets.DownloadMode.FORCE_REDOWNLOAD use cache to download dataset features with load_dataset function", + "html_url": "https://github.com/huggingface/datasets/issues/6562", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6562/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Document YAML configuration with \"data_dir\"", + "html_url": "https://github.com/huggingface/datasets/issues/6561", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6561/comments", + "labels": "[{'color': '0075ca', 'default': True, 'description': 'Improvements or additions to documentation', 'id': 1935892861, 'name': 'documentation', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODYx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/documentation'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Support Video ", + "html_url": "https://github.com/huggingface/datasets/issues/6560", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6560/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Latest version 2.16.1, when load dataset error occurs. ValueError: BuilderConfig 'allenai--c4' not found. Available: ['default']", + "html_url": "https://github.com/huggingface/datasets/issues/6559", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6559/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "OSError: image file is truncated (1 bytes not processed) #28323", + "html_url": "https://github.com/huggingface/datasets/issues/6558", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6558/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Support standalone yaml", + "html_url": "https://github.com/huggingface/datasets/pull/6557", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6557/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6557.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6557', 'merged_at': '2024-01-11T17:53:42Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6557.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6557'}", + "is_pull_request": true + }, + { + "title": "Fix imagefolder with one image", + "html_url": "https://github.com/huggingface/datasets/pull/6556", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6556/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6556.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6556', 'merged_at': '2024-01-09T13:06:30Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6556.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6556'}", + "is_pull_request": true + }, + { + "title": "Do not use Parquet exports if revision is passed", + "html_url": "https://github.com/huggingface/datasets/pull/6555", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6555/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6555.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6555', 'merged_at': '2024-02-02T10:35:28Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6555.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6555'}", + "is_pull_request": true + }, + { + "title": "Parquet exports are used even if revision is passed", + "html_url": "https://github.com/huggingface/datasets/issues/6554", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6554/comments", + "labels": "[{'color': 'd73a4a', 'default': True, 'description': \"Something isn't working\", 'id': 1935892857, 'name': 'bug', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODU3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/bug'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Cannot import name 'load_dataset' from .... module \u2018datasets\u2019", + "html_url": "https://github.com/huggingface/datasets/issues/6553", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6553/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Loading a dataset from Google Colab hangs at \"Resolving data files\".", + "html_url": "https://github.com/huggingface/datasets/issues/6552", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6552/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix parallel downloads for datasets without scripts", + "html_url": "https://github.com/huggingface/datasets/pull/6551", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6551/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6551.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6551', 'merged_at': '2024-01-03T13:19:47Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6551.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6551'}", + "is_pull_request": true + }, + { + "title": "Multi gpu docs", + "html_url": "https://github.com/huggingface/datasets/pull/6550", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6550/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6550.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6550', 'merged_at': '2024-01-31T13:38:59Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6550.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6550'}", + "is_pull_request": true + }, + { + "title": "Loading from hf hub with clearer error message", + "html_url": "https://github.com/huggingface/datasets/issues/6549", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6549/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Skip if a dataset has issues", + "html_url": "https://github.com/huggingface/datasets/issues/6548", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6548/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "set dev version", + "html_url": "https://github.com/huggingface/datasets/pull/6547", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6547/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6547.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6547', 'merged_at': '2023-12-30T16:47:27Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6547.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6547'}", + "is_pull_request": true + }, + { + "title": "Release: 2.16.1", + "html_url": "https://github.com/huggingface/datasets/pull/6546", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6546/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6546.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6546', 'merged_at': '2023-12-30T16:45:52Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6546.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6546'}", + "is_pull_request": true + }, + { + "title": "`image` column not automatically inferred if image dataset only contains 1 image", + "html_url": "https://github.com/huggingface/datasets/issues/6545", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6545/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix custom configs from script", + "html_url": "https://github.com/huggingface/datasets/pull/6544", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6544/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6544.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6544', 'merged_at': '2023-12-30T16:09:49Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6544.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6544'}", + "is_pull_request": true + }, + { + "title": "Fix dl_manager.extract returning FileNotFoundError", + "html_url": "https://github.com/huggingface/datasets/pull/6543", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6543/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6543.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6543', 'merged_at': '2023-12-30T15:53:59Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6543.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6543'}", + "is_pull_request": true + }, + { + "title": "Datasets : wikipedia 20220301.en error ", + "html_url": "https://github.com/huggingface/datasets/issues/6542", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6542/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Dataset not loading successfully.", + "html_url": "https://github.com/huggingface/datasets/issues/6541", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6541/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Extreme inefficiency for `save_to_disk` when merging datasets", + "html_url": "https://github.com/huggingface/datasets/issues/6540", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6540/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "'Repo card metadata block was not found' when loading a pragmeval dataset", + "html_url": "https://github.com/huggingface/datasets/issues/6539", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6539/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "ImportError: cannot import name 'SchemaInferenceError' from 'datasets.arrow_writer' (/opt/conda/lib/python3.10/site-packages/datasets/arrow_writer.py)", + "html_url": "https://github.com/huggingface/datasets/issues/6538", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6538/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Adding support for netCDF (*.nc) files", + "html_url": "https://github.com/huggingface/datasets/issues/6537", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6537/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "datasets.load_dataset raises FileNotFoundError for datasets==2.16.0", + "html_url": "https://github.com/huggingface/datasets/issues/6536", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6536/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "IndexError: Invalid key: 47682 is out of bounds for size 0 while using PEFT", + "html_url": "https://github.com/huggingface/datasets/issues/6535", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6535/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "How to configure multiple folders in the same zip package", + "html_url": "https://github.com/huggingface/datasets/issues/6534", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6534/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "ted_talks_iwslt | Error: Config name is missing", + "html_url": "https://github.com/huggingface/datasets/issues/6533", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6533/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "[Feature request] Indexing datasets by a customly-defined id field to enable random access dataset items via the id", + "html_url": "https://github.com/huggingface/datasets/issues/6532", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6532/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Add polars compatibility", + "html_url": "https://github.com/huggingface/datasets/pull/6531", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6531/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6531.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6531', 'merged_at': '2024-03-08T15:22:58Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6531.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6531'}", + "is_pull_request": true + }, + { + "title": "Impossible to save a mapped dataset to disk", + "html_url": "https://github.com/huggingface/datasets/issues/6530", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6530/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Impossible to only download a test split", + "html_url": "https://github.com/huggingface/datasets/issues/6529", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6529/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "set dev version", + "html_url": "https://github.com/huggingface/datasets/pull/6528", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6528/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6528.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6528', 'merged_at': '2023-12-22T14:25:34Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6528.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6528'}", + "is_pull_request": true + }, + { + "title": "Release: 2.16.0", + "html_url": "https://github.com/huggingface/datasets/pull/6527", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6527/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6527.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6527', 'merged_at': '2023-12-22T14:17:55Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6527.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6527'}", + "is_pull_request": true + }, + { + "title": "Preserve order of configs and splits when using Parquet exports", + "html_url": "https://github.com/huggingface/datasets/pull/6526", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6526/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6526.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6526', 'merged_at': '2023-12-22T11:36:14Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6526.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6526'}", + "is_pull_request": true + }, + { + "title": "BBox type", + "html_url": "https://github.com/huggingface/datasets/pull/6525", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6525/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6525.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6525', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6525.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6525'}", + "is_pull_request": true + }, + { + "title": "Streaming the Pile: Missing Files", + "html_url": "https://github.com/huggingface/datasets/issues/6524", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6524/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "fix tests", + "html_url": "https://github.com/huggingface/datasets/pull/6523", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6523/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6523.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6523', 'merged_at': '2023-12-21T15:50:38Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6523.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6523'}", + "is_pull_request": true + }, + { + "title": "Loading HF Hub Dataset (private org repo) fails to load all features", + "html_url": "https://github.com/huggingface/datasets/issues/6522", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6522/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "The order of the splits is not preserved", + "html_url": "https://github.com/huggingface/datasets/issues/6521", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6521/comments", + "labels": "[{'color': 'd73a4a', 'default': True, 'description': \"Something isn't working\", 'id': 1935892857, 'name': 'bug', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODU3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/bug'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Support commit_description parameter in push_to_hub", + "html_url": "https://github.com/huggingface/datasets/pull/6520", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6520/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6520.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6520', 'merged_at': '2023-12-21T14:43:35Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6520.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6520'}", + "is_pull_request": true + }, + { + "title": "Support push_to_hub canonical datasets", + "html_url": "https://github.com/huggingface/datasets/pull/6519", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6519/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6519.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6519', 'merged_at': '2023-12-21T14:40:57Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6519.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6519'}", + "is_pull_request": true + }, + { + "title": "fix get_metadata_patterns function args error", + "html_url": "https://github.com/huggingface/datasets/pull/6518", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6518/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6518.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6518', 'merged_at': '2023-12-21T15:07:57Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6518.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6518'}", + "is_pull_request": true + }, + { + "title": "Bug get_metadata_patterns arg error", + "html_url": "https://github.com/huggingface/datasets/issues/6517", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6517/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Support huggingface-hub pre-releases", + "html_url": "https://github.com/huggingface/datasets/pull/6516", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6516/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6516.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6516', 'merged_at': '2023-12-20T08:44:44Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6516.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6516'}", + "is_pull_request": true + }, + { + "title": "Why call http_head() when fsspec_head() succeeds", + "html_url": "https://github.com/huggingface/datasets/issues/6515", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6515/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Cache backward compatibility with 2.15.0", + "html_url": "https://github.com/huggingface/datasets/pull/6514", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6514/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6514.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6514', 'merged_at': '2023-12-21T21:07:55Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6514.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6514'}", + "is_pull_request": true + }, + { + "title": "Support huggingface-hub 0.20.0", + "html_url": "https://github.com/huggingface/datasets/issues/6513", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6513/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Remove deprecated HfFolder", + "html_url": "https://github.com/huggingface/datasets/pull/6512", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6512/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6512.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6512', 'merged_at': '2023-12-19T20:14:30Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6512.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6512'}", + "is_pull_request": true + }, + { + "title": "Implement get dataset default config name", + "html_url": "https://github.com/huggingface/datasets/pull/6511", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6511/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6511.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6511', 'merged_at': '2023-12-21T14:42:40Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6511.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6511'}", + "is_pull_request": true + }, + { + "title": "Replace `list_files_info` with `list_repo_tree` in `push_to_hub`", + "html_url": "https://github.com/huggingface/datasets/pull/6510", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6510/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6510.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6510', 'merged_at': '2023-12-19T17:58:34Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6510.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6510'}", + "is_pull_request": true + }, + { + "title": "Better cast error when generating dataset", + "html_url": "https://github.com/huggingface/datasets/pull/6509", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6509/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6509.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6509', 'merged_at': '2023-12-19T09:31:03Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6509.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6509'}", + "is_pull_request": true + }, + { + "title": "Read GeoParquet files using parquet reader", + "html_url": "https://github.com/huggingface/datasets/pull/6508", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6508/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6508.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6508', 'merged_at': '2024-01-26T16:18:41Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6508.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6508'}", + "is_pull_request": true + }, + { + "title": "where is glue_metric.py> @Frankie123421 what was the resolution to this?", + "html_url": "https://github.com/huggingface/datasets/issues/6507", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6507/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Incorrect test set labels for RTE and CoLA datasets via load_dataset", + "html_url": "https://github.com/huggingface/datasets/issues/6506", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6506/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Got stuck when I trying to load a dataset", + "html_url": "https://github.com/huggingface/datasets/issues/6505", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6505/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Error Pushing to Hub", + "html_url": "https://github.com/huggingface/datasets/issues/6504", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6504/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix streaming xnli", + "html_url": "https://github.com/huggingface/datasets/pull/6503", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6503/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6503.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6503', 'merged_at': '2023-12-15T14:44:46Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6503.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6503'}", + "is_pull_request": true + }, + { + "title": "Pickle support for `torch.Generator` objects", + "html_url": "https://github.com/huggingface/datasets/pull/6502", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6502/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6502.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6502', 'merged_at': '2023-12-15T14:58:22Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6502.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6502'}", + "is_pull_request": true + }, + { + "title": " OverflowError: value too large to convert to int32_t ", + "html_url": "https://github.com/huggingface/datasets/issues/6501", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6501/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Enable setting config as default when push_to_hub", + "html_url": "https://github.com/huggingface/datasets/pull/6500", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6500/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6500.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6500', 'merged_at': '2023-12-18T11:50:03Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6500.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6500'}", + "is_pull_request": true + }, + { + "title": "docs: add reference Git over SSH", + "html_url": "https://github.com/huggingface/datasets/pull/6499", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6499/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6499.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6499', 'merged_at': '2023-12-15T11:42:38Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6499.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6499'}", + "is_pull_request": true + }, + { + "title": "Fallback on dataset script if user wants to load default config", + "html_url": "https://github.com/huggingface/datasets/pull/6498", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6498/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6498.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6498', 'merged_at': '2023-12-15T13:10:48Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6498.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6498'}", + "is_pull_request": true + }, + { + "title": "Support setting a default config name in push_to_hub", + "html_url": "https://github.com/huggingface/datasets/issues/6497", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6497/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Error when writing a dataset to HF Hub: A commit has happened since. Please refresh and try again.", + "html_url": "https://github.com/huggingface/datasets/issues/6496", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6496/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Image Data loaded Twice", + "html_url": "https://github.com/huggingface/datasets/issues/6494", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6494/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Newline characters don't behave as expected when calling dataset.info", + "html_url": "https://github.com/huggingface/datasets/issues/6495", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6495/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Lazy data files resolution and offline cache reload", + "html_url": "https://github.com/huggingface/datasets/pull/6493", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6493/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6493.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6493', 'merged_at': '2023-12-21T15:13:11Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6493.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6493'}", + "is_pull_request": true + }, + { + "title": "Make push_to_hub return CommitInfo", + "html_url": "https://github.com/huggingface/datasets/pull/6492", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6492/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6492.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6492', 'merged_at': '2023-12-13T14:22:41Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6492.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6492'}", + "is_pull_request": true + }, + { + "title": "Fix metrics dead link", + "html_url": "https://github.com/huggingface/datasets/pull/6491", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6491/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6491.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6491', 'merged_at': '2023-12-21T15:08:53Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6491.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6491'}", + "is_pull_request": true + }, + { + "title": "`load_dataset(...,save_infos=True)` not working without loading script", + "html_url": "https://github.com/huggingface/datasets/issues/6490", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6490/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "load_dataset imageflder for aws s3 path ", + "html_url": "https://github.com/huggingface/datasets/issues/6489", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6489/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "429 Client Error", + "html_url": "https://github.com/huggingface/datasets/issues/6488", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6488/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Update builder hash with info", + "html_url": "https://github.com/huggingface/datasets/pull/6487", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6487/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6487.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6487', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6487.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6487'}", + "is_pull_request": true + }, + { + "title": "Fix docs phrasing about supported formats when sharing a dataset", + "html_url": "https://github.com/huggingface/datasets/pull/6486", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6486/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6486.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6486', 'merged_at': '2023-12-13T14:15:21Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6486.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6486'}", + "is_pull_request": true + }, + { + "title": "FileNotFoundError: [Errno 2] No such file or directory: 'nul'", + "html_url": "https://github.com/huggingface/datasets/issues/6485", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6485/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Iterable Dataset: rename column clashes with remove column", + "html_url": "https://github.com/huggingface/datasets/issues/6483", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6483/comments", + "labels": "[{'color': 'fef2c0', 'default': False, 'description': '', 'id': 3287858981, 'name': 'streaming', 'node_id': 'MDU6TGFiZWwzMjg3ODU4OTgx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/streaming'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "[Feature Request] Dataset versioning", + "html_url": "https://github.com/huggingface/datasets/issues/6484", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6484/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix max lock length on unix", + "html_url": "https://github.com/huggingface/datasets/pull/6482", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6482/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6482.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6482', 'merged_at': '2023-12-12T11:47:27Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6482.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6482'}", + "is_pull_request": true + }, + { + "title": "using torchrun, save_to_disk suddenly shows SIGTERM", + "html_url": "https://github.com/huggingface/datasets/issues/6481", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6481/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Add IterableDataset `__repr__`", + "html_url": "https://github.com/huggingface/datasets/pull/6480", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6480/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6480.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6480', 'merged_at': '2023-12-08T13:26:54Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6480.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6480'}", + "is_pull_request": true + }, + { + "title": "More robust preupload retry mechanism", + "html_url": "https://github.com/huggingface/datasets/pull/6479", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6479/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6479.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6479', 'merged_at': '2023-12-06T19:41:06Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6479.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6479'}", + "is_pull_request": true + }, + { + "title": "How to load data from lakefs", + "html_url": "https://github.com/huggingface/datasets/issues/6478", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6478/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix PermissionError on Windows CI", + "html_url": "https://github.com/huggingface/datasets/pull/6477", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6477/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6477.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6477', 'merged_at': '2023-12-06T09:17:52Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6477.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6477'}", + "is_pull_request": true + }, + { + "title": "CI on windows is broken: PermissionError", + "html_url": "https://github.com/huggingface/datasets/issues/6476", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6476/comments", + "labels": "[{'color': 'd73a4a', 'default': True, 'description': \"Something isn't working\", 'id': 1935892857, 'name': 'bug', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODU3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/bug'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "laion2B-en failed to load on Windows with PrefetchVirtualMemory failed", + "html_url": "https://github.com/huggingface/datasets/issues/6475", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6475/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Deprecate Beam API and download from HF GCS bucket", + "html_url": "https://github.com/huggingface/datasets/pull/6474", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6474/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6474.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6474', 'merged_at': '2024-03-12T14:50:12Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6474.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6474'}", + "is_pull_request": true + }, + { + "title": "Fix CI quality", + "html_url": "https://github.com/huggingface/datasets/pull/6473", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6473/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6473.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6473', 'merged_at': '2023-12-05T18:08:41Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6473.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6473'}", + "is_pull_request": true + }, + { + "title": "CI quality is broken", + "html_url": "https://github.com/huggingface/datasets/issues/6472", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6472/comments", + "labels": "[{'color': 'd73a4a', 'default': True, 'description': \"Something isn't working\", 'id': 1935892857, 'name': 'bug', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODU3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/bug'}\n {'color': 'd4c5f9', 'default': False, 'description': 'Maintenance tasks', 'id': 4296013012, 'name': 'maintenance', 'node_id': 'LA_kwDODunzps8AAAABAA_01A', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/maintenance'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Remove delete doc CI", + "html_url": "https://github.com/huggingface/datasets/pull/6471", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6471/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6471.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6471', 'merged_at': '2023-12-05T12:38:50Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6471.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6471'}", + "is_pull_request": true + }, + { + "title": "If an image in a dataset is corrupted, we get unescapable error", + "html_url": "https://github.com/huggingface/datasets/issues/6470", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6470/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Don't expand_info in HF glob", + "html_url": "https://github.com/huggingface/datasets/pull/6469", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6469/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6469.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6469', 'merged_at': '2023-12-15T13:12:30Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6469.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6469'}", + "is_pull_request": true + }, + { + "title": "Use auth to get parquet export", + "html_url": "https://github.com/huggingface/datasets/pull/6468", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6468/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6468.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6468', 'merged_at': '2023-12-04T17:15:11Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6468.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6468'}", + "is_pull_request": true + }, + { + "title": "New version release request", + "html_url": "https://github.com/huggingface/datasets/issues/6467", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6467/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Can't align optional features of struct", + "html_url": "https://github.com/huggingface/datasets/issues/6466", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6466/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "`load_dataset` uses out-of-date cache instead of re-downloading a changed dataset", + "html_url": "https://github.com/huggingface/datasets/issues/6465", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6465/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Add concurrent loading of shards to datasets.load_from_disk ", + "html_url": "https://github.com/huggingface/datasets/pull/6464", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6464/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6464.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6464', 'merged_at': '2024-01-26T15:10:26Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6464.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6464'}", + "is_pull_request": true + }, + { + "title": "Disable benchmarks in PRs", + "html_url": "https://github.com/huggingface/datasets/pull/6463", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6463/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6463.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6463', 'merged_at': '2023-12-01T12:03:04Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6463.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6463'}", + "is_pull_request": true + }, + { + "title": "Missing DatasetNotFoundError", + "html_url": "https://github.com/huggingface/datasets/pull/6462", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6462/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6462.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6462', 'merged_at': '2023-11-30T18:30:30Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6462.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6462'}", + "is_pull_request": true + }, + { + "title": "Fix shard retry mechanism in `push_to_hub`", + "html_url": "https://github.com/huggingface/datasets/pull/6461", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6461/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6461.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6461', 'merged_at': '2023-12-01T17:51:33Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6461.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6461'}", + "is_pull_request": true + }, + { + "title": "jsonlines files don't load with `load_dataset`", + "html_url": "https://github.com/huggingface/datasets/issues/6460", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6460/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Retrieve cached datasets that were pushed to hub when offline", + "html_url": "https://github.com/huggingface/datasets/pull/6459", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6459/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6459.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6459', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6459.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6459'}", + "is_pull_request": true + }, + { + "title": "Lazy data files resolution", + "html_url": "https://github.com/huggingface/datasets/pull/6458", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6458/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6458.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6458', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6458.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6458'}", + "is_pull_request": true + }, + { + "title": "`TypeError`: huggingface_hub.hf_file_system.HfFileSystem.find() got multiple values for keyword argument 'maxdepth'", + "html_url": "https://github.com/huggingface/datasets/issues/6457", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6457/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Don't require trust_remote_code in inspect_dataset", + "html_url": "https://github.com/huggingface/datasets/pull/6456", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6456/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6456.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6456', 'merged_at': '2023-11-30T10:34:12Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6456.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6456'}", + "is_pull_request": true + }, + { + "title": "Refactor `dill` logic", + "html_url": "https://github.com/huggingface/datasets/pull/6454", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6454/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6454.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6454', 'merged_at': '2023-11-28T16:29:31Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6454.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6454'}", + "is_pull_request": true + }, + { + "title": "Update hub-docs reference", + "html_url": "https://github.com/huggingface/datasets/pull/6453", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6453/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6453.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6453', 'merged_at': '2023-11-27T10:17:34Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6453.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6453'}", + "is_pull_request": true + }, + { + "title": "Praveen_repo_pull_req", + "html_url": "https://github.com/huggingface/datasets/pull/6452", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6452/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6452.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6452', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6452.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6452'}", + "is_pull_request": true + }, + { + "title": "Unable to read \"marsyas/gtzan\" data", + "html_url": "https://github.com/huggingface/datasets/issues/6451", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6451/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Support multiple image/audio columns in ImageFolder/AudioFolder", + "html_url": "https://github.com/huggingface/datasets/issues/6450", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6450/comments", + "labels": "[{'color': 'cfd3d7', 'default': True, 'description': 'This issue or pull request already exists', 'id': 1935892865, 'name': 'duplicate', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODY1', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/duplicate'}\n {'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix metadata file resolution when inferred pattern is `**`", + "html_url": "https://github.com/huggingface/datasets/pull/6449", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6449/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6449.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6449', 'merged_at': '2023-11-24T17:13:02Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6449.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6449'}", + "is_pull_request": true + }, + { + "title": "Use parquet export if possible", + "html_url": "https://github.com/huggingface/datasets/pull/6448", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6448/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6448.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6448', 'merged_at': '2023-12-01T17:50:59Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6448.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6448'}", + "is_pull_request": true + }, + { + "title": "Support one dataset loader per config when using YAML", + "html_url": "https://github.com/huggingface/datasets/issues/6447", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6447/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Speech Commands v2 dataset doesn't match AST-v2 config", + "html_url": "https://github.com/huggingface/datasets/issues/6446", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6446/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Use `filelock` package for file locking", + "html_url": "https://github.com/huggingface/datasets/pull/6445", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6445/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6445.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6445', 'merged_at': '2023-11-23T18:41:22Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6445.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6445'}", + "is_pull_request": true + }, + { + "title": "Remove `Table.__getstate__` and `Table.__setstate__`", + "html_url": "https://github.com/huggingface/datasets/pull/6444", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6444/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6444.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6444', 'merged_at': '2023-11-23T15:13:28Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6444.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6444'}", + "is_pull_request": true + }, + { + "title": "Trouble loading files defined in YAML explicitly", + "html_url": "https://github.com/huggingface/datasets/issues/6443", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6443/comments", + "labels": "[{'color': 'd73a4a', 'default': True, 'description': \"Something isn't working\", 'id': 1935892857, 'name': 'bug', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODU3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/bug'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Trouble loading image folder with additional features - metadata file ignored", + "html_url": "https://github.com/huggingface/datasets/issues/6442", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6442/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Trouble Loading a Gated Dataset For User with Granted Permission", + "html_url": "https://github.com/huggingface/datasets/issues/6441", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6441/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "`.map` not hashing under python 3.9", + "html_url": "https://github.com/huggingface/datasets/issues/6440", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6440/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Download + preparation speed of datasets.load_dataset is 20x slower than huggingface hub snapshot and manual loding", + "html_url": "https://github.com/huggingface/datasets/issues/6439", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6439/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Support GeoParquet", + "html_url": "https://github.com/huggingface/datasets/issues/6438", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6438/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Problem in training iterable dataset", + "html_url": "https://github.com/huggingface/datasets/issues/6437", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6437/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "TypeError: () takes 0 positional arguments but 1 was given", + "html_url": "https://github.com/huggingface/datasets/issues/6436", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6436/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Cannot re-initialize CUDA in forked subprocess. To use CUDA with multiprocessing, you must use the 'spawn' start method", + "html_url": "https://github.com/huggingface/datasets/issues/6435", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6435/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Use `ruff` for formatting", + "html_url": "https://github.com/huggingface/datasets/pull/6434", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6434/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6434.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6434', 'merged_at': '2023-11-21T14:13:13Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6434.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6434'}", + "is_pull_request": true + }, + { + "title": "Better `tqdm` wrapper", + "html_url": "https://github.com/huggingface/datasets/pull/6433", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6433/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6433.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6433', 'merged_at': '2023-11-22T16:42:08Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6433.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6433'}", + "is_pull_request": true + }, + { + "title": "load_dataset does not load all of the data in my input file", + "html_url": "https://github.com/huggingface/datasets/issues/6432", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6432/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Create DatasetNotFoundError and DataFilesNotFoundError", + "html_url": "https://github.com/huggingface/datasets/pull/6431", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6431/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6431.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6431', 'merged_at': '2023-11-22T15:12:33Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6431.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6431'}", + "is_pull_request": true + }, + { + "title": "Add trust_remote_code argument", + "html_url": "https://github.com/huggingface/datasets/pull/6429", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6429/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6429.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6429', 'merged_at': '2023-11-28T16:03:43Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6429.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6429'}", + "is_pull_request": true + }, + { + "title": "Set dev version", + "html_url": "https://github.com/huggingface/datasets/pull/6428", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6428/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6428.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6428', 'merged_at': '2023-11-16T08:13:28Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6428.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6428'}", + "is_pull_request": true + }, + { + "title": "Release: 2.15.0", + "html_url": "https://github.com/huggingface/datasets/pull/6427", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6427/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6427.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6427', 'merged_at': '2023-11-16T07:43:05Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6427.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6427'}", + "is_pull_request": true + }, + { + "title": "More robust temporary directory deletion", + "html_url": "https://github.com/huggingface/datasets/pull/6426", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6426/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6426.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6426', 'merged_at': '2023-12-01T15:31:19Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6426.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6426'}", + "is_pull_request": true + }, + { + "title": "Fix deprecation warning when building conda package", + "html_url": "https://github.com/huggingface/datasets/pull/6425", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6425/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6425.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6425', 'merged_at': '2023-12-13T14:16:00Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6425.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6425'}", + "is_pull_request": true + }, + { + "title": "[docs] troubleshooting guide", + "html_url": "https://github.com/huggingface/datasets/pull/6424", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6424/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6424.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6424', 'merged_at': '2023-11-30T17:23:46Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6424.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6424'}", + "is_pull_request": true + }, + { + "title": "Fix conda release by adding pyarrow-hotfix dependency", + "html_url": "https://github.com/huggingface/datasets/pull/6423", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6423/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6423.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6423', 'merged_at': '2023-11-15T17:09:24Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6423.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6423'}", + "is_pull_request": true + }, + { + "title": "Allow to choose the `writer_batch_size` when using `save_to_disk`", + "html_url": "https://github.com/huggingface/datasets/issues/6422", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6422/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Add pyarrow-hotfix to release docs", + "html_url": "https://github.com/huggingface/datasets/pull/6421", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6421/comments", + "labels": "[{'color': 'd4c5f9', 'default': False, 'description': 'Maintenance tasks', 'id': 4296013012, 'name': 'maintenance', 'node_id': 'LA_kwDODunzps8AAAABAA_01A', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/maintenance'}]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6421.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6421', 'merged_at': '2023-11-15T13:38:22Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6421.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6421'}", + "is_pull_request": true + }, + { + "title": "Set dev version", + "html_url": "https://github.com/huggingface/datasets/pull/6420", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6420/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6420.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6420', 'merged_at': '2023-11-15T08:22:33Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6420.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6420'}", + "is_pull_request": true + }, + { + "title": "Release: 2.14.7", + "html_url": "https://github.com/huggingface/datasets/pull/6419", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6419/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6419.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6419', 'merged_at': '2023-11-15T08:12:59Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6419.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6419'}", + "is_pull_request": true + }, + { + "title": "Remove token value from warnings", + "html_url": "https://github.com/huggingface/datasets/pull/6418", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6418/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6418.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6418', 'merged_at': '2023-11-14T22:19:45Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6418.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6418'}", + "is_pull_request": true + }, + { + "title": " Bug: LayoutLMv3 finetuning on FUNSD Notebook; Arrow Error", + "html_url": "https://github.com/huggingface/datasets/issues/6417", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6417/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Rename audio_classificiation.py to audio_classification.py", + "html_url": "https://github.com/huggingface/datasets/pull/6416", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6416/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6416.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6416', 'merged_at': '2023-11-15T11:53:20Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6416.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6416'}", + "is_pull_request": true + }, + { + "title": "Fix multi gpu map example", + "html_url": "https://github.com/huggingface/datasets/pull/6415", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6415/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6415.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6415', 'merged_at': '2023-11-22T15:42:19Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6415.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6415'}", + "is_pull_request": true + }, + { + "title": "Set `usedforsecurity=False` in hashlib methods (FIPS compliance)", + "html_url": "https://github.com/huggingface/datasets/pull/6414", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6414/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6414.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6414', 'merged_at': '2023-11-17T14:17:00Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6414.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6414'}", + "is_pull_request": true + }, + { + "title": "User token is printed out!", + "html_url": "https://github.com/huggingface/datasets/issues/6412", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6412/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix dependency conflict within CI build documentation", + "html_url": "https://github.com/huggingface/datasets/pull/6411", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6411/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6411.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6411', 'merged_at': '2023-11-14T10:05:34Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6411.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6411'}", + "is_pull_request": true + }, + { + "title": "Datasets does not load HuggingFace Repository properly", + "html_url": "https://github.com/huggingface/datasets/issues/6410", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6410/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "using DownloadManager to download from local filesystem and disable_progress_bar, there will be an exception ", + "html_url": "https://github.com/huggingface/datasets/issues/6409", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6409/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "`IterableDataset` lost but not keep columns when map function adding columns with names in `remove_columns`", + "html_url": "https://github.com/huggingface/datasets/issues/6408", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6408/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Loading the dataset from private S3 bucket gives \"TypeError: cannot pickle '_contextvars.Context' object\"", + "html_url": "https://github.com/huggingface/datasets/issues/6407", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6407/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "CI Build PR Documentation is broken: ImportError: cannot import name 'TypeAliasType' from 'typing_extensions'", + "html_url": "https://github.com/huggingface/datasets/issues/6406", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6406/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "ConfigNamesError on a simple CSV file", + "html_url": "https://github.com/huggingface/datasets/issues/6405", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6405/comments", + "labels": "[{'color': 'd73a4a', 'default': True, 'description': \"Something isn't working\", 'id': 1935892857, 'name': 'bug', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODU3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/bug'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Support pyarrow 14.0.1 and fix vulnerability CVE-2023-47248", + "html_url": "https://github.com/huggingface/datasets/pull/6404", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6404/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6404.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6404', 'merged_at': '2023-11-14T10:23:29Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6404.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6404'}", + "is_pull_request": true + }, + { + "title": "Cannot import datasets on google colab (python 3.10.12)", + "html_url": "https://github.com/huggingface/datasets/issues/6403", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6403/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Update torch_formatter.py", + "html_url": "https://github.com/huggingface/datasets/pull/6402", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6402/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6402.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6402', 'merged_at': '2024-03-15T11:25:36Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6402.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6402'}", + "is_pull_request": true + }, + { + "title": "dataset = load_dataset(\"Hyperspace-Technologies/scp-wiki-text\") not working", + "html_url": "https://github.com/huggingface/datasets/issues/6401", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6401/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Safely load datasets by disabling execution of dataset loading script", + "html_url": "https://github.com/huggingface/datasets/issues/6400", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6400/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "TypeError: Cannot convert pyarrow.lib.ChunkedArray to pyarrow.lib.Array", + "html_url": "https://github.com/huggingface/datasets/issues/6399", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6399/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Remove redundant condition in builders", + "html_url": "https://github.com/huggingface/datasets/pull/6398", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6398/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6398.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6398', 'merged_at': '2023-11-14T10:43:00Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6398.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6398'}", + "is_pull_request": true + }, + { + "title": "Raise a different exception for inexisting dataset vs files without known extension", + "html_url": "https://github.com/huggingface/datasets/issues/6397", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6397/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Issue with pyarrow 14.0.1", + "html_url": "https://github.com/huggingface/datasets/issues/6396", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6396/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Add ability to set lock type", + "html_url": "https://github.com/huggingface/datasets/issues/6395", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6395/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "TorchFormatter images (H, W, C) instead of (C, H, W) format", + "html_url": "https://github.com/huggingface/datasets/issues/6394", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6394/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Filter occasionally hangs", + "html_url": "https://github.com/huggingface/datasets/issues/6393", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6393/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "`push_to_hub` is not robust to hub closing connection", + "html_url": "https://github.com/huggingface/datasets/issues/6392", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6392/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Webdataset dataset builder", + "html_url": "https://github.com/huggingface/datasets/pull/6391", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6391/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6391.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6391', 'merged_at': '2023-11-28T16:33:10Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6391.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6391'}", + "is_pull_request": true + }, + { + "title": "handle future deprecation argument", + "html_url": "https://github.com/huggingface/datasets/pull/6390", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6390/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6390.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6390', 'merged_at': '2023-11-14T15:15:59Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6390.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6390'}", + "is_pull_request": true + }, + { + "title": "Index 339 out of range for dataset of size 339 <-- save_to_file()", + "html_url": "https://github.com/huggingface/datasets/issues/6389", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6389/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "How to create 3d medical imgae dataset?", + "html_url": "https://github.com/huggingface/datasets/issues/6388", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6388/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "How to load existing downloaded dataset ?", + "html_url": "https://github.com/huggingface/datasets/issues/6387", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6387/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Formatting overhead", + "html_url": "https://github.com/huggingface/datasets/issues/6386", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6386/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Get an error when i try to concatenate the squad dataset with my own dataset", + "html_url": "https://github.com/huggingface/datasets/issues/6385", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6385/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Load the local dataset folder from other place", + "html_url": "https://github.com/huggingface/datasets/issues/6384", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6384/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "imagenet-1k downloads over and over", + "html_url": "https://github.com/huggingface/datasets/issues/6383", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6383/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Add CheXpert dataset for vision", + "html_url": "https://github.com/huggingface/datasets/issues/6382", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6382/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}\n {'color': 'e99695', 'default': False, 'description': 'Requesting to add a new dataset', 'id': 2067376369, 'name': 'dataset request', 'node_id': 'MDU6TGFiZWwyMDY3Mzc2MzY5', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/dataset%20request'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Add my dataset", + "html_url": "https://github.com/huggingface/datasets/pull/6381", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6381/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6381.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6381', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6381.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6381'}", + "is_pull_request": true + }, + { + "title": "Fix for continuation behaviour on broken dataset archives due to starving download connections via HTTP-GET", + "html_url": "https://github.com/huggingface/datasets/pull/6380", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6380/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6380.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6380', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6380.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6380'}", + "is_pull_request": true + }, + { + "title": "Avoid redundant warning when encoding NumPy array as `Image`", + "html_url": "https://github.com/huggingface/datasets/pull/6379", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6379/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6379.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6379', 'merged_at': '2023-11-02T17:08:07Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6379.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6379'}", + "is_pull_request": true + }, + { + "title": "Support pyarrow 14.0.0", + "html_url": "https://github.com/huggingface/datasets/pull/6378", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6378/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6378.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6378', 'merged_at': '2023-11-02T15:15:44Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6378.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6378'}", + "is_pull_request": true + }, + { + "title": "Support pyarrow 14.0.0", + "html_url": "https://github.com/huggingface/datasets/issues/6377", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6377/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Caching problem when deleting a dataset ", + "html_url": "https://github.com/huggingface/datasets/issues/6376", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6376/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Temporarily pin pyarrow < 14.0.0", + "html_url": "https://github.com/huggingface/datasets/pull/6375", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6375/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6375.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6375', 'merged_at': '2023-11-02T10:11:19Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6375.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6375'}", + "is_pull_request": true + }, + { + "title": "CI is broken: TypeError: Couldn't cast array", + "html_url": "https://github.com/huggingface/datasets/issues/6374", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6374/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix typo in `Dataset.map` docstring", + "html_url": "https://github.com/huggingface/datasets/pull/6373", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6373/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6373.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6373', 'merged_at': '2023-11-02T10:11:38Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6373.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6373'}", + "is_pull_request": true + }, + { + "title": "do not try to download from HF GCS for generator", + "html_url": "https://github.com/huggingface/datasets/pull/6372", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6372/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6372.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6372', 'merged_at': '2023-11-02T15:52:09Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6372.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6372'}", + "is_pull_request": true + }, + { + "title": "`Dataset.from_generator` should not try to download from HF GCS", + "html_url": "https://github.com/huggingface/datasets/issues/6371", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6371/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "TensorDataset format does not work with Trainer from transformers", + "html_url": "https://github.com/huggingface/datasets/issues/6370", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6370/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Multi process map did not load cache file correctly", + "html_url": "https://github.com/huggingface/datasets/issues/6369", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6369/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix python formatting for complex types in `format_table`", + "html_url": "https://github.com/huggingface/datasets/pull/6368", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6368/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6368.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6368', 'merged_at': '2023-11-02T14:21:16Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6368.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6368'}", + "is_pull_request": true + }, + { + "title": "Fix time measuring snippet in docs", + "html_url": "https://github.com/huggingface/datasets/pull/6367", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6367/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6367.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6367', 'merged_at': '2023-10-31T18:24:02Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6367.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6367'}", + "is_pull_request": true + }, + { + "title": "with_format() function returns bytes instead of PIL images even when image column is not part of \"columns\"", + "html_url": "https://github.com/huggingface/datasets/issues/6366", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6366/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Parquet size grows exponential for categorical data", + "html_url": "https://github.com/huggingface/datasets/issues/6365", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6365/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "ArrowNotImplementedError: Unsupported cast from string to list using function cast_list", + "html_url": "https://github.com/huggingface/datasets/issues/6364", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6364/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "dataset.transform() hangs indefinitely while finetuning the stable diffusion XL", + "html_url": "https://github.com/huggingface/datasets/issues/6363", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6363/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Simplify filesystem logic", + "html_url": "https://github.com/huggingface/datasets/pull/6362", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6362/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6362.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6362', 'merged_at': '2023-11-15T14:02:02Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6362.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6362'}", + "is_pull_request": true + }, + { + "title": " Add support for `Sequence(Audio/Image)` feature in `push_to_hub`", + "html_url": "https://github.com/huggingface/datasets/issues/6360", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6360/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Stuck in \"Resolving data files...\"", + "html_url": "https://github.com/huggingface/datasets/issues/6359", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6359/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Mounting datasets cache fails due to absolute paths.", + "html_url": "https://github.com/huggingface/datasets/issues/6358", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6358/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Allow passing a multiprocessing context to functions that support `num_proc`", + "html_url": "https://github.com/huggingface/datasets/issues/6357", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6357/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Add `fsspec` version to the `datasets-cli env` command output", + "html_url": "https://github.com/huggingface/datasets/pull/6356", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6356/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6356.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6356', 'merged_at': '2023-10-26T18:32:21Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6356.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6356'}", + "is_pull_request": true + }, + { + "title": "More hub centric docs", + "html_url": "https://github.com/huggingface/datasets/pull/6355", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6355/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6355.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6355', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6355.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6355'}", + "is_pull_request": true + }, + { + "title": "`IterableDataset.from_spark` does not support multiple workers in pytorch `Dataloader`", + "html_url": "https://github.com/huggingface/datasets/issues/6354", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6354/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "load_dataset save_to_disk load_from_disk error", + "html_url": "https://github.com/huggingface/datasets/issues/6353", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6353/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Error loading wikitext data raise NotImplementedError(f\"Loading a dataset cached in a {type(self._fs).__name__} is not supported.\")", + "html_url": "https://github.com/huggingface/datasets/issues/6352", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6352/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix use_dataset.mdx", + "html_url": "https://github.com/huggingface/datasets/pull/6351", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6351/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6351.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6351', 'merged_at': '2023-10-26T17:10:27Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6351.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6351'}", + "is_pull_request": true + }, + { + "title": "Different objects are returned from calls that should be returning the same kind of object.", + "html_url": "https://github.com/huggingface/datasets/issues/6350", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6350/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Can't load ds = load_dataset(\"imdb\")", + "html_url": "https://github.com/huggingface/datasets/issues/6349", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6349/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Parquet stream-conversion fails to embed images/audio files from gated repos", + "html_url": "https://github.com/huggingface/datasets/issues/6348", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6348/comments", + "labels": "[{'color': 'd73a4a', 'default': True, 'description': \"Something isn't working\", 'id': 1935892857, 'name': 'bug', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODU3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/bug'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Incorrect example code in 'Create a dataset' docs", + "html_url": "https://github.com/huggingface/datasets/issues/6347", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6347/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix UnboundLocalError if preprocessing returns an empty list", + "html_url": "https://github.com/huggingface/datasets/pull/6346", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6346/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6346.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6346', 'merged_at': '2023-10-25T16:36:38Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6346.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6346'}", + "is_pull_request": true + }, + { + "title": "support squad structure datasets using a YAML parameter", + "html_url": "https://github.com/huggingface/datasets/issues/6345", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6345/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "set dev version", + "html_url": "https://github.com/huggingface/datasets/pull/6344", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6344/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6344.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6344', 'merged_at': '2023-10-23T15:13:38Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6344.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6344'}", + "is_pull_request": true + }, + { + "title": "Remove unused argument in `_get_data_files_patterns`", + "html_url": "https://github.com/huggingface/datasets/pull/6343", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6343/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6343.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6343', 'merged_at': '2023-11-16T09:03:39Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6343.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6343'}", + "is_pull_request": true + }, + { + "title": "Release: 2.14.6", + "html_url": "https://github.com/huggingface/datasets/pull/6342", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6342/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6342.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6342', 'merged_at': '2023-10-23T15:07:25Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6342.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6342'}", + "is_pull_request": true + }, + { + "title": "Release 2.14.5", + "html_url": "https://github.com/huggingface/datasets/pull/6340", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6340/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6340.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6340', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6340.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6340'}", + "is_pull_request": true + }, + { + "title": "minor release step improvement", + "html_url": "https://github.com/huggingface/datasets/pull/6339", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6339/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6339.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6339', 'merged_at': '2023-11-07T10:32:41Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6339.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6339'}", + "is_pull_request": true + }, + { + "title": "pin fsspec before it switches to glob.glob", + "html_url": "https://github.com/huggingface/datasets/pull/6338", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6338/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6338.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6338', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6338.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6338'}", + "is_pull_request": true + }, + { + "title": "Pin supported upper version of fsspec", + "html_url": "https://github.com/huggingface/datasets/pull/6337", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6337/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6337.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6337', 'merged_at': '2023-10-23T12:04:36Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6337.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6337'}", + "is_pull_request": true + }, + { + "title": "unpin-fsspec", + "html_url": "https://github.com/huggingface/datasets/pull/6336", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6336/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6336.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6336', 'merged_at': '2023-10-23T10:17:48Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6336.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6336'}", + "is_pull_request": true + }, + { + "title": "Support fsspec 2023.10.0", + "html_url": "https://github.com/huggingface/datasets/pull/6335", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6335/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6335.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6335', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6335.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6335'}", + "is_pull_request": true + }, + { + "title": "datasets.filesystems: fix is_remote_filesystems", + "html_url": "https://github.com/huggingface/datasets/pull/6334", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6334/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6334.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6334', 'merged_at': '2023-10-23T10:14:10Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6334.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6334'}", + "is_pull_request": true + }, + { + "title": "Support fsspec 2023.10.0", + "html_url": "https://github.com/huggingface/datasets/issues/6333", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6333/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Replace deprecated license_file in setup.cfg", + "html_url": "https://github.com/huggingface/datasets/pull/6332", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6332/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6332.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6332', 'merged_at': '2023-11-07T08:09:06Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6332.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6332'}", + "is_pull_request": true + }, + { + "title": "Temporarily pin fsspec < 2023.10.0", + "html_url": "https://github.com/huggingface/datasets/pull/6331", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6331/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6331.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6331', 'merged_at': '2023-10-23T09:17:55Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6331.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6331'}", + "is_pull_request": true + }, + { + "title": "Latest fsspec==2023.10.0 issue with streaming datasets", + "html_url": "https://github.com/huggingface/datasets/issues/6330", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6330/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "\u0634\u0628\u06a9\u0647 \u0647\u0627\u06cc \u0645\u062a\u0646 \u0628\u0647 \u06af\u0641\u062a\u0627\u0631 \u0627\u0628\u062a\u062f\u0627 \u0645\u062a\u0646 \u062f\u0627\u062f\u0647 \u0634\u062f\u0647 \u0631\u0627 \u0628\u0647 \u0628\u0627\u0632\u0646\u0645\u0627\u06cc\u06cc \u0645\u06cc\u0627\u0646\u06cc", + "html_url": "https://github.com/huggingface/datasets/issues/6329", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6329/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "\u0634\u0628\u06a9\u0647 \u0647\u0627\u06cc \u0645\u062a\u0646 \u0628\u0647 \u06af\u0641\u062a\u0627\u0631 \u0627\u0628\u062a\u062f\u0627 \u0645\u062a\u0646 \u062f\u0627\u062f\u0647 \u0634\u062f\u0647 \u0631\u0627 \u0628\u0647 \u0628\u0627\u0632\u0646\u0645\u0627\u06cc\u06cc \u0645\u06cc\u0627\u0646\u06cc", + "html_url": "https://github.com/huggingface/datasets/issues/6328", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6328/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "FileNotFoundError when trying to load the downloaded dataset with `load_dataset(..., streaming=True)`", + "html_url": "https://github.com/huggingface/datasets/issues/6327", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6327/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Create battery_analysis.py", + "html_url": "https://github.com/huggingface/datasets/pull/6326", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6326/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6326.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6326', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6326.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6326'}", + "is_pull_request": true + }, + { + "title": "Create battery_analysis.py", + "html_url": "https://github.com/huggingface/datasets/pull/6325", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6325/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6325.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6325', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6325.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6325'}", + "is_pull_request": true + }, + { + "title": "Conversion to Arrow fails due to wrong type heuristic", + "html_url": "https://github.com/huggingface/datasets/issues/6324", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6324/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Loading dataset from large GCS bucket very slow since 2.14", + "html_url": "https://github.com/huggingface/datasets/issues/6323", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6323/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix regex `get_data_files` formatting for base paths", + "html_url": "https://github.com/huggingface/datasets/pull/6322", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6322/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6322.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6322', 'merged_at': '2023-10-23T14:31:21Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6322.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6322'}", + "is_pull_request": true + }, + { + "title": "Fix typos", + "html_url": "https://github.com/huggingface/datasets/pull/6321", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6321/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6321.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6321', 'merged_at': '2023-10-19T17:07:35Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6321.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6321'}", + "is_pull_request": true + }, + { + "title": "Dataset slice splits can't load training and validation at the same time", + "html_url": "https://github.com/huggingface/datasets/issues/6320", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6320/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Datasets.map is severely broken", + "html_url": "https://github.com/huggingface/datasets/issues/6319", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6319/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Deterministic set hash", + "html_url": "https://github.com/huggingface/datasets/pull/6318", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6318/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6318.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6318', 'merged_at': '2023-10-19T16:16:31Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6318.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6318'}", + "is_pull_request": true + }, + { + "title": "sentiment140 dataset unavailable", + "html_url": "https://github.com/huggingface/datasets/issues/6317", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6317/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix loading Hub datasets with CSV metadata file", + "html_url": "https://github.com/huggingface/datasets/pull/6316", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6316/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6316.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6316', 'merged_at': '2023-10-20T06:14:09Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6316.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6316'}", + "is_pull_request": true + }, + { + "title": "Hub datasets with CSV metadata raise ArrowInvalid: JSON parse error: Invalid value. in row 0", + "html_url": "https://github.com/huggingface/datasets/issues/6315", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6315/comments", + "labels": "[{'color': 'd73a4a', 'default': True, 'description': \"Something isn't working\", 'id': 1935892857, 'name': 'bug', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODU3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/bug'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Support creating new branch in push_to_hub", + "html_url": "https://github.com/huggingface/datasets/pull/6314", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6314/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6314.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6314', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6314.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6314'}", + "is_pull_request": true + }, + { + "title": "Fix commit message formatting in multi-commit uploads", + "html_url": "https://github.com/huggingface/datasets/pull/6313", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6313/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6313.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6313', 'merged_at': '2023-10-20T13:57:38Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6313.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6313'}", + "is_pull_request": true + }, + { + "title": "docs: resolving namespace conflict, refactored variable ", + "html_url": "https://github.com/huggingface/datasets/pull/6312", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6312/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6312.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6312', 'merged_at': '2023-10-19T16:23:07Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6312.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6312'}", + "is_pull_request": true + }, + { + "title": "cast_column to Sequence with length=4 occur exception raise in datasets/table.py:2146", + "html_url": "https://github.com/huggingface/datasets/issues/6311", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6311/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Add return_file_name in load_dataset", + "html_url": "https://github.com/huggingface/datasets/pull/6310", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6310/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6310.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6310', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6310.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6310'}", + "is_pull_request": true + }, + { + "title": "Fix get_data_patterns for directories with the word data twice", + "html_url": "https://github.com/huggingface/datasets/pull/6309", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6309/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6309.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6309', 'merged_at': '2023-10-18T13:50:35Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6309.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6309'}", + "is_pull_request": true + }, + { + "title": "module 'resource' has no attribute 'error'", + "html_url": "https://github.com/huggingface/datasets/issues/6308", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6308/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix typo in code example in docs", + "html_url": "https://github.com/huggingface/datasets/pull/6307", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6307/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6307.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6307', 'merged_at': '2023-10-17T06:36:18Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6307.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6307'}", + "is_pull_request": true + }, + { + "title": "pyinstaller : OSError: could not get source code", + "html_url": "https://github.com/huggingface/datasets/issues/6306", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6306/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Cannot load dataset with `2.14.5`: `FileNotFound` error", + "html_url": "https://github.com/huggingface/datasets/issues/6305", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6305/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Update README.md", + "html_url": "https://github.com/huggingface/datasets/pull/6304", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6304/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6304.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6304', 'merged_at': '2023-10-17T15:04:52Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6304.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6304'}", + "is_pull_request": true + }, + { + "title": "Parquet uploads off-by-one naming scheme", + "html_url": "https://github.com/huggingface/datasets/issues/6303", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6303/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "ArrowWriter/ParquetWriter `write` method does not increase `_num_bytes` and hence datasets not sharding at `max_shard_size`", + "html_url": "https://github.com/huggingface/datasets/issues/6302", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6302/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Unpin `tensorflow` maximum version", + "html_url": "https://github.com/huggingface/datasets/pull/6301", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6301/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6301.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6301', 'merged_at': '2023-10-12T15:49:54Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6301.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6301'}", + "is_pull_request": true + }, + { + "title": "Unpin `jax` maximum version", + "html_url": "https://github.com/huggingface/datasets/pull/6300", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6300/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6300.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6300', 'merged_at': '2023-10-12T16:28:57Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6300.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6300'}", + "is_pull_request": true + }, + { + "title": "Support for newer versions of JAX", + "html_url": "https://github.com/huggingface/datasets/issues/6299", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6299/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Doc readme improvements", + "html_url": "https://github.com/huggingface/datasets/pull/6298", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6298/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6298.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6298', 'merged_at': '2023-10-12T12:38:19Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6298.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6298'}", + "is_pull_request": true + }, + { + "title": "Fix ArrayXD cast", + "html_url": "https://github.com/huggingface/datasets/pull/6297", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6297/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6297.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6297', 'merged_at': '2023-10-13T13:45:30Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6297.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6297'}", + "is_pull_request": true + }, + { + "title": "Move `exceptions.py` to `utils/exceptions.py`", + "html_url": "https://github.com/huggingface/datasets/pull/6296", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6296/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6296.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6296', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6296.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6296'}", + "is_pull_request": true + }, + { + "title": "Fix parquet columns argument in streaming mode", + "html_url": "https://github.com/huggingface/datasets/pull/6295", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6295/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6295.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6295', 'merged_at': '2023-10-11T16:21:36Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6295.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6295'}", + "is_pull_request": true + }, + { + "title": "IndexError: Invalid key is out of bounds for size 0 despite having a populated dataset", + "html_url": "https://github.com/huggingface/datasets/issues/6294", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6294/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Choose columns to stream parquet data in streaming mode", + "html_url": "https://github.com/huggingface/datasets/issues/6293", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6293/comments", + "labels": "[{'color': 'd73a4a', 'default': True, 'description': \"Something isn't working\", 'id': 1935892857, 'name': 'bug', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODU3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/bug'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "how to load the image of dtype float32 or float64", + "html_url": "https://github.com/huggingface/datasets/issues/6292", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6292/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Casting type from Array2D int to Array2D float crashes", + "html_url": "https://github.com/huggingface/datasets/issues/6291", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6291/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Incremental dataset (e.g. `.push_to_hub(..., append=True)`)", + "html_url": "https://github.com/huggingface/datasets/issues/6290", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6290/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "testing doc-builder", + "html_url": "https://github.com/huggingface/datasets/pull/6289", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6289/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6289.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6289', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6289.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6289'}", + "is_pull_request": true + }, + { + "title": "Dataset.from_pandas with a DataFrame of PIL.Images", + "html_url": "https://github.com/huggingface/datasets/issues/6288", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6288/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "map() not recognizing \"text\"", + "html_url": "https://github.com/huggingface/datasets/issues/6287", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6287/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Create DefunctDatasetError", + "html_url": "https://github.com/huggingface/datasets/pull/6286", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6286/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6286.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6286', 'merged_at': '2023-10-10T07:03:04Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6286.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6286'}", + "is_pull_request": true + }, + { + "title": "TypeError: expected str, bytes or os.PathLike object, not dict", + "html_url": "https://github.com/huggingface/datasets/issues/6285", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6285/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Add Belebele multiple-choice machine reading comprehension (MRC) dataset", + "html_url": "https://github.com/huggingface/datasets/issues/6284", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6284/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix array cast/embed with null values", + "html_url": "https://github.com/huggingface/datasets/pull/6283", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6283/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6283.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6283', 'merged_at': '2024-02-06T19:24:18Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6283.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6283'}", + "is_pull_request": true + }, + { + "title": "Drop data_files duplicates", + "html_url": "https://github.com/huggingface/datasets/pull/6282", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6282/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6282.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6282', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6282.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6282'}", + "is_pull_request": true + }, + { + "title": "Improve documentation of dataset.from_generator", + "html_url": "https://github.com/huggingface/datasets/pull/6281", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6281/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6281.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6281', 'merged_at': '2023-10-05T18:57:41Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6281.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6281'}", + "is_pull_request": true + }, + { + "title": "Couldn't cast array of type fixed_size_list to Sequence(Value(float64))", + "html_url": "https://github.com/huggingface/datasets/issues/6280", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6280/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Batched IterableDataset", + "html_url": "https://github.com/huggingface/datasets/issues/6279", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6279/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "No data files duplicates", + "html_url": "https://github.com/huggingface/datasets/pull/6278", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6278/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6278.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6278', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6278.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6278'}", + "is_pull_request": true + }, + { + "title": "FileNotFoundError: Couldn't find a module script at /content/paws-x/paws-x.py. Module 'paws-x' doesn't exist on the Hugging Face Hub either.", + "html_url": "https://github.com/huggingface/datasets/issues/6277", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6277/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "I'm trying to fine tune the openai/whisper model from huggingface using jupyter notebook and i keep getting this error", + "html_url": "https://github.com/huggingface/datasets/issues/6276", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6276/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Would like to Contribute a dataset", + "html_url": "https://github.com/huggingface/datasets/issues/6275", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6275/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "FileNotFoundError for dataset with multiple builder config", + "html_url": "https://github.com/huggingface/datasets/issues/6274", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6274/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Broken Link to PubMed Abstracts dataset .", + "html_url": "https://github.com/huggingface/datasets/issues/6273", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6273/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Duplicate `data_files` when named `/.parquet`", + "html_url": "https://github.com/huggingface/datasets/issues/6272", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6272/comments", + "labels": "[{'color': 'd73a4a', 'default': True, 'description': \"Something isn't working\", 'id': 1935892857, 'name': 'bug', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODU3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/bug'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Overwriting Split overwrites data but not metadata, corrupting dataset", + "html_url": "https://github.com/huggingface/datasets/issues/6271", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6271/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Dataset.from_generator raises with sharded gen_args", + "html_url": "https://github.com/huggingface/datasets/issues/6270", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6270/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Reduce the number of commits in `push_to_hub`", + "html_url": "https://github.com/huggingface/datasets/pull/6269", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6269/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6269.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6269', 'merged_at': '2023-10-16T13:30:46Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6269.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6269'}", + "is_pull_request": true + }, + { + "title": "Add repo_id to DatasetInfo", + "html_url": "https://github.com/huggingface/datasets/pull/6268", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6268/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6268.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6268', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6268.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6268'}", + "is_pull_request": true + }, + { + "title": "Multi label class encoding", + "html_url": "https://github.com/huggingface/datasets/issues/6267", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6267/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Use LibYAML with PyYAML if available", + "html_url": "https://github.com/huggingface/datasets/pull/6266", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6266/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6266.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6266', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6266.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6266'}", + "is_pull_request": true + }, + { + "title": "Remove `apache_beam` import in `BeamBasedBuilder._save_info`", + "html_url": "https://github.com/huggingface/datasets/pull/6265", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6265/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6265.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6265', 'merged_at': '2023-09-28T18:23:35Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6265.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6265'}", + "is_pull_request": true + }, + { + "title": "Temporarily pin tensorflow < 2.14.0", + "html_url": "https://github.com/huggingface/datasets/pull/6264", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6264/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6264.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6264', 'merged_at': '2023-09-27T08:36:39Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6264.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6264'}", + "is_pull_request": true + }, + { + "title": "CI is broken: ImportError: cannot import name 'context' from 'tensorflow.python'", + "html_url": "https://github.com/huggingface/datasets/issues/6263", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6263/comments", + "labels": "[{'color': 'd73a4a', 'default': True, 'description': \"Something isn't working\", 'id': 1935892857, 'name': 'bug', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODU3', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/bug'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix CI 404 errors", + "html_url": "https://github.com/huggingface/datasets/pull/6262", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6262/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6262.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6262', 'merged_at': '2023-09-28T15:30:40Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6262.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6262'}", + "is_pull_request": true + }, + { + "title": "Can't load a dataset", + "html_url": "https://github.com/huggingface/datasets/issues/6261", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6261/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "REUSE_DATASET_IF_EXISTS don't work ", + "html_url": "https://github.com/huggingface/datasets/issues/6260", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6260/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Duplicated Rows When Loading Parquet Files from Root Directory with Subdirectories", + "html_url": "https://github.com/huggingface/datasets/issues/6259", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6259/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "[DOCS] Fix typo: Elasticsearch", + "html_url": "https://github.com/huggingface/datasets/pull/6258", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6258/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6258.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6258', 'merged_at': '2023-09-26T13:36:40Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6258.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6258'}", + "is_pull_request": true + }, + { + "title": "HfHubHTTPError - exceeded our hourly quotas for action: commit", + "html_url": "https://github.com/huggingface/datasets/issues/6257", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6257/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "load_dataset() function's cache_dir does not seems to work", + "html_url": "https://github.com/huggingface/datasets/issues/6256", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6256/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Parallelize builder configs creation", + "html_url": "https://github.com/huggingface/datasets/pull/6255", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6255/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6255.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6255', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6255.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6255'}", + "is_pull_request": true + }, + { + "title": "Dataset.from_generator() cost much more time in vscode debugging mode then running mode", + "html_url": "https://github.com/huggingface/datasets/issues/6254", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6254/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Check builder cls default config name in inspect", + "html_url": "https://github.com/huggingface/datasets/pull/6253", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6253/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6253.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6253', 'merged_at': '2023-09-21T14:08:00Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6253.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6253'}", + "is_pull_request": true + }, + { + "title": "exif_transpose not done to Image (PIL problem)", + "html_url": "https://github.com/huggingface/datasets/issues/6252", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6252/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Support streaming datasets with pyarrow.parquet.read_table", + "html_url": "https://github.com/huggingface/datasets/pull/6251", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6251/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6251.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6251', 'merged_at': '2023-09-27T06:26:24Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6251.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6251'}", + "is_pull_request": true + }, + { + "title": "Update create_dataset.mdx", + "html_url": "https://github.com/huggingface/datasets/pull/6247", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6247/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6247.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6247', 'merged_at': '2023-09-19T18:40:10Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6247.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6247'}", + "is_pull_request": true + }, + { + "title": "Add new column to dataset", + "html_url": "https://github.com/huggingface/datasets/issues/6246", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6246/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Add support for `fsspec>=2023.9.0`", + "html_url": "https://github.com/huggingface/datasets/pull/6244", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6244/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6244.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6244', 'merged_at': '2023-09-26T15:32:51Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6244.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6244'}", + "is_pull_request": true + }, + { + "title": "Fix cast from fixed size list to variable size list", + "html_url": "https://github.com/huggingface/datasets/pull/6243", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6243/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6243.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6243', 'merged_at': '2023-09-19T17:53:17Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6243.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6243'}", + "is_pull_request": true + }, + { + "title": "Data alteration when loading dataset with unspecified inner sequence length", + "html_url": "https://github.com/huggingface/datasets/issues/6242", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6242/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Remove unused global variables in `audio.py`", + "html_url": "https://github.com/huggingface/datasets/pull/6241", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6241/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6241.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6241', 'merged_at': '2023-09-15T15:46:07Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6241.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6241'}", + "is_pull_request": true + }, + { + "title": "Dataloader stuck on multiple GPUs", + "html_url": "https://github.com/huggingface/datasets/issues/6240", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6240/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Load local audio data doesn't work", + "html_url": "https://github.com/huggingface/datasets/issues/6239", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6239/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "`dataset.filter` ALWAYS removes the first item from the dataset when using batched=True", + "html_url": "https://github.com/huggingface/datasets/issues/6238", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6238/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Tokenization with multiple workers is too slow", + "html_url": "https://github.com/huggingface/datasets/issues/6237", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6237/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Support buffer shuffle for to_tf_dataset", + "html_url": "https://github.com/huggingface/datasets/issues/6236", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6236/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Support multiprocessing for download/extract nestedly", + "html_url": "https://github.com/huggingface/datasets/issues/6235", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6235/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Update README.md", + "html_url": "https://github.com/huggingface/datasets/pull/6233", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6233/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6233.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6233', 'merged_at': '2023-09-13T18:10:04Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6233.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6233'}", + "is_pull_request": true + }, + { + "title": "Improve error message for missing function parameters", + "html_url": "https://github.com/huggingface/datasets/pull/6232", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6232/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6232.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6232', 'merged_at': '2023-09-15T17:59:02Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6232.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6232'}", + "is_pull_request": true + }, + { + "title": "Overwrite legacy default config name in `dataset_infos.json` in packaged datasets", + "html_url": "https://github.com/huggingface/datasets/pull/6231", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6231/comments", + "labels": "[]", + "state": "open", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6231.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6231', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6231.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6231'}", + "is_pull_request": true + }, + { + "title": "Don't skip hidden files in `dl_manager.iter_files` when they are given as input", + "html_url": "https://github.com/huggingface/datasets/pull/6230", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6230/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6230.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6230', 'merged_at': '2023-09-13T18:12:09Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6230.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6230'}", + "is_pull_request": true + }, + { + "title": "Apply inference on all images in the dataset", + "html_url": "https://github.com/huggingface/datasets/issues/6229", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6229/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Remove RGB -> BGR image conversion in Object Detection tutorial", + "html_url": "https://github.com/huggingface/datasets/pull/6228", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6228/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6228.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6228', 'merged_at': '2023-09-08T17:52:16Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6228.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6228'}", + "is_pull_request": true + }, + { + "title": "Add push_to_hub with multiple configs docs", + "html_url": "https://github.com/huggingface/datasets/pull/6226", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6226/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6226.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6226', 'merged_at': '2023-09-08T12:20:51Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6226.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6226'}", + "is_pull_request": true + }, + { + "title": "Conversion from RGB to BGR in Object Detection tutorial", + "html_url": "https://github.com/huggingface/datasets/issues/6225", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6225/comments", + "labels": "[]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Ignore `dataset_info.json` in data files resolution", + "html_url": "https://github.com/huggingface/datasets/pull/6224", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6224/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6224.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6224', 'merged_at': '2023-09-07T15:37:20Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6224.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6224'}", + "is_pull_request": true + }, + { + "title": "Update README.md", + "html_url": "https://github.com/huggingface/datasets/pull/6223", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6223/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6223.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6223', 'merged_at': '2023-09-13T22:23:42Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6223.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6223'}", + "is_pull_request": true + }, + { + "title": "fix typo in Audio dataset documentation", + "html_url": "https://github.com/huggingface/datasets/pull/6222", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6222/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6222.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6222', 'merged_at': '2023-09-07T15:39:09Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6222.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6222'}", + "is_pull_request": true + }, + { + "title": "Support saving datasets with custom formatting", + "html_url": "https://github.com/huggingface/datasets/issues/6221", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6221/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Set dev version", + "html_url": "https://github.com/huggingface/datasets/pull/6220", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6220/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6220.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6220', 'merged_at': '2023-09-06T15:41:13Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6220.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6220'}", + "is_pull_request": true + }, + { + "title": "Release: 2.14.5", + "html_url": "https://github.com/huggingface/datasets/pull/6219", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6219/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6219.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6219', 'merged_at': '2023-09-06T15:18:51Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6219.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6219'}", + "is_pull_request": true + }, + { + "title": "Rename old push_to_hub configs to \"default\" in dataset_infos", + "html_url": "https://github.com/huggingface/datasets/pull/6218", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6218/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6218.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6218', 'merged_at': '2023-09-06T11:23:56Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6218.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6218'}", + "is_pull_request": true + }, + { + "title": "`Dataset.to_dict()` ignore `decode=True` with Image feature", + "html_url": "https://github.com/huggingface/datasets/issues/6217", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6217/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Release: 2.13.2", + "html_url": "https://github.com/huggingface/datasets/pull/6216", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6216/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6216.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6216', 'merged_at': '2023-09-06T08:22:43Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6216.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6216'}", + "is_pull_request": true + }, + { + "title": "Fix checking patterns to infer packaged builder ", + "html_url": "https://github.com/huggingface/datasets/pull/6215", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6215/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6215.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6215', 'merged_at': '2023-09-06T10:25:00Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6215.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6215'}", + "is_pull_request": true + }, + { + "title": "Unpin fsspec < 2023.9.0", + "html_url": "https://github.com/huggingface/datasets/issues/6214", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6214/comments", + "labels": "[{'color': 'a2eeef', 'default': True, 'description': 'New feature or request', 'id': 1935892871, 'name': 'enhancement', 'node_id': 'MDU6TGFiZWwxOTM1ODkyODcx', 'url': 'https://api.github.com/repos/huggingface/datasets/labels/enhancement'}]", + "state": "closed", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Better list array values handling in cast/embed storage", + "html_url": "https://github.com/huggingface/datasets/pull/6213", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6213/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6213.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6213', 'merged_at': None, 'patch_url': 'https://github.com/huggingface/datasets/pull/6213.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6213'}", + "is_pull_request": true + }, + { + "title": "Tilde (~) is not supported for data_files", + "html_url": "https://github.com/huggingface/datasets/issues/6212", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6212/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "Fix empty splitinfo json", + "html_url": "https://github.com/huggingface/datasets/pull/6211", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/6211/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/6211.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/6211', 'merged_at': '2023-09-04T14:47:17Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/6211.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/6211'}", + "is_pull_request": true + } +] \ No newline at end of file diff --git a/examples/data/request_issue.py b/examples/data/request_issue.py new file mode 100644 index 00000000..06e4e6d4 --- /dev/null +++ b/examples/data/request_issue.py @@ -0,0 +1,49 @@ +import json +import requests + +# 从 JSON 文件读取数据 +with open('test_data.json', 'r') as f: + data = json.load(f) + +# 用于保存所有提取的数据 +all_extracted_data = [] + +# 遍历每个条目 +for item in data: + title = item['title'] + comments_url = item['comments_url'] + + # 从评论 URL 获取评论数据 + response = requests.get(comments_url) + + if response.status_code == 200: + comments = response.json() + + # 提取评论数据 + extracted_data = { + "title": title, + } + + for i, comment in enumerate(comments): + entry = { + "user_login": comment["user"]["login"], + "created_at": comment["created_at"], + "updated_at": comment["updated_at"], + "body": comment["body"] + } + + if i == 0: + extracted_data.update(entry) # 第一条评论直接加入 + else: + extracted_data[f"answer_{i}"] = entry # 后续评论作为回答 + + # 添加到总提取数据中 + all_extracted_data.append(extracted_data) + else: + print(f"请求失败,状态码: {response.status_code},对于标题: {title}") + +# 保存提取的数据到新的 JSON 文件 +with open('extracted_data.json', 'w') as f: + json.dump(all_extracted_data, f, indent=4) + +print("所有数据已提取并保存到 extracted_data.json") diff --git a/examples/data/test_data.json b/examples/data/test_data.json new file mode 100644 index 00000000..a7c0161b --- /dev/null +++ b/examples/data/test_data.json @@ -0,0 +1,38 @@ +[ + { + "title": "How to set_epoch with interleave_datasets?", + "html_url": "https://github.com/huggingface/datasets/issues/7051", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7051/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "add checkpoint and resume title in docs", + "html_url": "https://github.com/huggingface/datasets/pull/7050", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7050/comments", + "labels": "[]", + "state": "closed", + "pull_request": "{'diff_url': 'https://github.com/huggingface/datasets/pull/7050.diff', 'html_url': 'https://github.com/huggingface/datasets/pull/7050', 'merged_at': '2024-07-15T15:59:56Z', 'patch_url': 'https://github.com/huggingface/datasets/pull/7050.patch', 'url': 'https://api.github.com/repos/huggingface/datasets/pulls/7050'}", + "is_pull_request": true + }, + { + "title": "Save nparray as list", + "html_url": "https://github.com/huggingface/datasets/issues/7049", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7049/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + }, + { + "title": "ImportError: numpy.core.multiarray when using `filter`", + "html_url": "https://github.com/huggingface/datasets/issues/7048", + "comments_url": "https://api.github.com/repos/huggingface/datasets/issues/7048/comments", + "labels": "[]", + "state": "open", + "pull_request": "NaN", + "is_pull_request": false + } +] \ No newline at end of file diff --git a/examples/government/singletask_learning_bench/imgs/img.png b/examples/government/singletask_learning_bench/imgs/img.png new file mode 100644 index 00000000..e2d000f2 Binary files /dev/null and b/examples/government/singletask_learning_bench/imgs/img.png differ diff --git a/examples/llm_simple_qa/README.md b/examples/llm_simple_qa/README.md new file mode 100644 index 00000000..934bd79a --- /dev/null +++ b/examples/llm_simple_qa/README.md @@ -0,0 +1,54 @@ +# README + +## Simple QA + +### Prepare Data + +The data of simple-qa example structure is: + +``` +. +├── test_data +│ └── data.jsonl +└── train_data + └── data.jsonl +``` + +`train_data/data.jsonl` is empty, and the `test_data/data.jsonl` is as follows: + +``` +{"question": "如果小明有5个苹果,他给了小华3个,那么小明还剩下多少个苹果?\nA. 2个\nB. 3个\nC. 4个\nD. 5个", "answer": "A"} +{"question": "下列哪个数是最小的质数?\nA. 0\nB. 1\nC. 2\nD. 4", "answer": "C"} +{"question": "一个长方形的长是10厘米,宽是5厘米,它的周长是多少厘米?\nA. 20厘米\nB. 30厘米\nC. 40厘米\nD. 50厘米", "answer": "B"} +{"question": "下列哪个分数是最接近1的?\nA. 1/2\nB. 3/4\nC. 4/5\nD. 5/6", "answer": "D"} +{"question": "如果一个数加上10等于30,那么这个数是多少?\nA. 20\nB. 21\nC. 22\nD. 23", "answer": "A"} +{"question": "下列哪个算式的结果最大?\nA. 3 + 4\nB. 5 - 2\nC. 6 * 2\nD. 7 ÷ 2", "answer": "C"} +{"question": "一个班级有24个学生,如果每个学生都带了2本书,那么总共有多少本书?\nA. 48本\nB. 36本\nC. 24本\nD. 12本", "answer": "A"} +{"question": "下列哪个是正确的乘法口诀?\nA. 三三得七\nB. 四四十六\nC. 五五二十五\nD. 六六三十六", "answer": "B"} +{"question": "如果一个数是另一个数的3倍,并且这个数是15,那么另一个数是多少?\nA. 5\nB. 10\nC. 15\nD. 45", "answer": "A"} +{"question": "下列哪个图形的周长最长?\nA. 正方形\nB. 长方形\nC. 圆形\nD. 三角形", "answer": "C"} +``` + +### Prepare Environment + +You need to install the changed-sedna package, which added `JsonlDataParse` in `sedna.datasources` + +Replace the file in `yourpath/anaconda3/envs/ianvs/lib/python3.x/site-packages/sedna` with `examples/resources/sedna-with-jsonl.zip` + + +### Run Ianvs + +Run the following command: + +`ianvs -f examples/llm/singletask_learning_bench/simple_qa/benchmarkingjob.yaml` + +## OpenCompass Evaluation + +### Prepare Environment + +`pip install examples/resources/opencompass-0.2.5-py3-none-any.whl` + +### Run Evaluation + +`python run_op.py examples/llm/singletask_learning_bench/simple_qa/testalgorithms/gen/op_eval.py` + diff --git a/examples/llm_simple_qa/benchmarkingjob.yaml b/examples/llm_simple_qa/benchmarkingjob.yaml new file mode 100644 index 00000000..78961e52 --- /dev/null +++ b/examples/llm_simple_qa/benchmarkingjob.yaml @@ -0,0 +1,72 @@ +benchmarkingjob: + # job name of bechmarking; string type; + name: "benchmarkingjob" + # the url address of job workspace that will reserve the output of tests; string type; + workspace: "/home/icyfeather/project/ianvs/workspace" + + # the url address of test environment configuration file; string type; + # the file format supports yaml/yml; + testenv: "./examples/llm/singletask_learning_bench/simple_qa/testenv/testenv.yaml" + + # the configuration of test object + test_object: + # test type; string type; + # currently the option of value is "algorithms",the others will be added in succession. + type: "algorithms" + # test algorithm configuration files; list type; + algorithms: + # algorithm name; string type; + - name: "simple_qa_singletask_learning" + # the url address of test algorithm configuration file; string type; + # the file format supports yaml/yml; + url: "./examples/llm/singletask_learning_bench/simple_qa/testalgorithms/gen/gen_algorithm.yaml" + + # the configuration of ranking leaderboard + rank: + # rank leaderboard with metric of test case's evaluation and order ; list type; + # the sorting priority is based on the sequence of metrics in the list from front to back; + sort_by: [ { "acc": "descend" } ] + + # visualization configuration + visualization: + # mode of visualization in the leaderboard; string type; + # There are quite a few possible dataitems in the leaderboard. Not all of them can be shown simultaneously on the screen. + # In the leaderboard, we provide the "selected_only" mode for the user to configure what is shown or is not shown. + mode: "selected_only" + # method of visualization for selected dataitems; string type; + # currently the options of value are as follows: + # 1> "print_table": print selected dataitems; + method: "print_table" + + # selected dataitem configuration + # The user can add his/her interested dataitems in terms of "paradigms", "modules", "hyperparameters" and "metrics", + # so that the selected columns will be shown. + selected_dataitem: + # currently the options of value are as follows: + # 1> "all": select all paradigms in the leaderboard; + # 2> paradigms in the leaderboard, e.g., "singletasklearning" + paradigms: [ "all" ] + # currently the options of value are as follows: + # 1> "all": select all modules in the leaderboard; + # 2> modules in the leaderboard, e.g., "basemodel" + modules: [ "all" ] + # currently the options of value are as follows: + # 1> "all": select all hyperparameters in the leaderboard; + # 2> hyperparameters in the leaderboard, e.g., "momentum" + hyperparameters: [ "all" ] + # currently the options of value are as follows: + # 1> "all": select all metrics in the leaderboard; + # 2> metrics in the leaderboard, e.g., "f1_score" + metrics: [ "acc" ] + + # model of save selected and all dataitems in workspace; string type; + # currently the options of value are as follows: + # 1> "selected_and_all": save selected and all dataitems; + # 2> "selected_only": save selected dataitems; + save_mode: "selected_and_all" + + + + + + diff --git a/examples/llm_simple_qa/testalgorithms/data.jsonl b/examples/llm_simple_qa/testalgorithms/data.jsonl new file mode 100644 index 00000000..abc96adb --- /dev/null +++ b/examples/llm_simple_qa/testalgorithms/data.jsonl @@ -0,0 +1,23 @@ +{"question": "如果小明有5个苹果,他给了小华3个,那么小明还剩下多少个苹果?\nA. 2个\nB. 3个\nC. 4个\nD. 5个", "answer": "A"} +{"question": "下列哪个数是最小的质数?\nA. 0\nB. 1\nC. 2\nD. 4", "answer": "C"} +{"question": "一个长方形的长是10厘米,宽是5厘米,它的周长是多少厘米?\nA. 20厘米\nB. 30厘米\nC. 40厘米\nD. 50厘米", "answer": "B"} +{"question": "下列哪个分数是最接近1的?\nA. 1/2\nB. 3/4\nC. 4/5\nD. 5/6", "answer": "D"} +{"question": "如果一个数加上10等于30,那么这个数是多少?\nA. 20\nB. 21\nC. 22\nD. 23", "answer": "A"} +{"question": "下列哪个算式的结果最大?\nA. 3 + 4\nB. 5 - 2\nC. 6 * 2\nD. 7 ÷ 2", "answer": "C"} +{"question": "一个班级有24个学生,如果每个学生都带了2本书,那么总共有多少本书?\nA. 48本\nB. 36本\nC. 24本\nD. 12本", "answer": "A"} +{"question": "下列哪个是正确的乘法口诀?\nA. 三三得七\nB. 四四十六\nC. 五五二十五\nD. 六六三十六", "answer": "B"} +{"question": "如果一个数是另一个数的3倍,并且这个数是15,那么另一个数是多少?\nA. 5\nB. 10\nC. 15\nD. 45", "answer": "A"} +{"question": "下列哪个图形的周长最长?\nA. 正方形\nB. 长方形\nC. 圆形\nD. 三角形", "answer": "C"} + +{"question": "如下是一个Python函数\"def wait(self, wait_time: int) -> list:all_ready = False\n while not all_ready:\n self._instances = self.get_instances()\n if not self._instances:\n self._logger.warning(\n f\\\"No instance found, waiting {wait_time}s ...\\\",\n )\n sleep(wait_time)\n continue\n all_ready = True\n for instance in self._instances:\n if not instance[\\\"health\\\"]:\n self._logger.warning(\n f\\\"Instance {instance['name']} is not ready, waiting {wait_time}s ...\\\",\n )\n sleep(wait_time)\n all_ready = False\n break\n return self._instances\",请问它的作用是什么?\nA. 确保服务或组件的所有实例都可用,然后再继续执行\nB. 函数的作用是在等待指定时间后,立即返回一个包含所有实例的列表,不检查实例的状态\nC. 函数的目的是为每个不健康的实例记录一次警告日志,不进行任何等待或重试\nD. 函数会检查每个实例的健康状态,如果所有实例在首次检查时都健康,就继续等待直到 wait_time 结束,然后返回实例列表", "answer": "A"} +{"question": "如下是一个Python函数\"def _to_instances(self, controller_instance) -> List[dict]:\n instance = {}\n instance[\\\"name\\\"] = controller_instance.name\n instance[\\\"hostname\\\"] = controller_instance.name\n instance[\\\"health\\\"] = controller_instance.status == \\\"running\\\" and controller_instance.attrs[\\\"State\\\"][\\\"Health\\\"][\\\"Status\\\"] == \\\"healthy\\\"\n instance[\\\"env\\\"] = {}\n for env in controller_instance.attrs[\\\"Config\\\"][\\\"Env\\\"]:\n variable = env.split(\\\"=\\\")[0]\n value = env.replace(f\\\"{variable}=\\\", \\\"\\\", 1)\n instance[\\\"env\\\"][variable] = value\n return [instance]\",请问它的作用是什么?\nA. 函数用于修改控制器实例的属性,如名称和主机名\nB. 函数返回一个包含所有控制器实例属性的复杂嵌套结构\nC. 处理单个Docker容器实例并将其信息转换为字典\nD. 函数用于删除控制器实例的环境变量配置", "answer": "C"} +{"question": "如下是一个Python函数\"instance[\\\"health\\\"] = controller_instance.status == \\\"running\\\" and controller_instance.attrs[\\\"State\\\"][\\\"Health\\\"][\\\"Status\\\"] == \\\"healthy\\\"\",请问它的作用是什么?\nA. 代码段会删除controller_instance中的status属性\nB. 多个属性组合起来判断实例的健康状态\nC. 如果controller_instance的状态为running,此代码将instance[\\\"health\\\"]设置为False\nD. 代码段检查controller_instance.attrs[\\\"State\\\"][\\\"Health\\\"][\\\"Status\\\"]的值是否为unhealthy,然后相应地更新instance[\\\"health\\\"]", "answer": "B"} +{"question": "如下是一个Python函数\"result = self.__custom_confs_rx.search(variable)\",请问它的作用是什么?\nA. 这段代码的作用是将字符串 variable 与 self.__custom_confs_rx 进行替换操作\nB. 这段代码会删除 variable 中所有与 self.__custom_confs_rx 匹配的内容\nC. 这段代码用于计算 variable 和 self.__custom_confs_rx 的长度差\nD. 使用正则表达式匹配变量名,判断是否符合特定的配置项格式", "answer": "D"} +{"question": "如下是一个Python函数\"if not self.update_needed(self._instances, self._services, configs=self._configs):\",请问它的作用是什么?\nA. 此函数用于更新类实例中的所有服务和配置\nB. 当 `update_needed` 方法返回 `True` 时,该代码片段将终止程序运行\nC. 调用update_needed方法检查当前配置是否需要更新\nD. 此代码片段用于直接修改 `_instances`、`_services` 和 `_configs` 的值", "answer": "C"} +{"question": "如下是一个Python函数\"def set_value_from_polygon(self, pol_x, pol_y, val, inside=True):\n \\\"\\\"\\\"set_value_from_polygon\n Setting value inside or outside polygon\n :param pol_x: x position list for a polygon\n :param pol_y: y position list for a polygon\n :param val: grid value\n :param inside: setting data inside or outside\n \\\"\\\"\\\"\n # making ring polygon\n if (pol_x[0] != pol_x[-1]) or (pol_y[0] != pol_y[-1]):\n np.append(pol_x, pol_x[0])\n np.append(pol_y, pol_y[0])\n # setting value for all grid\n for x_ind in range(self.width):\n for y_ind in range(self.height):\n x_pos, y_pos = self.calc_grid_central_xy_position_from_xy_index(\n x_ind, y_ind)\n flag = this.check_inside_polygon(x_pos, y_pos, pol_x, pol_y)\n if flag is inside:\n this.set_value_from_xy_index(x_ind, y_ind, val)\",请问它的作用是什么?\nA. 根据多边形的形状设置网格地图中的值\nB. 该函数用于计算多边形的面积\nC. 该函数用于绘制多边形图形\nD. 该函数用于从多边形的顶点坐标生成一个新的多边形对象", "answer": "A"} +{"question": "如下是一个Python函数\"if not check_car_collision(x_list, y_list, yaw_list, ox, oy, kd_tree): return None\",请问它的作用是什么?\nA. 检查生成的路径是否与障碍物冲突。\nB. 这个函数用来检查给定的列表中是否所有元素都相等\nC. 这个函数返回所有在x_list和y_list中的元素的和\nD. 这个函数用于创建一个新的kd树来存储车辆位置数据", "answer": "A"} +{"question": "如下是一个Python函数\"heapq.heappush(pq, (calc_cost(start_node, h_dp, config), calc_index(start_node, config)))\",请问它的作用是什么?\nA. 该函数从堆`pq`中删除一个元素\nB. 将节点添加到优先级队列(使用堆数据结构实现)\nC. 该函数返回堆`pq`中的最大元素\nD. 该函数用于创建一个新的空堆", "answer": "B"} +{"question": "如下是一个Python函数\"def calc_index(node, x_width, x_min, y_min): return (node.y - y_min) * x_width + (node.x - x_min)\",请问它的作用是什么?\nA. 将节点坐标转换为一维索引\nB. 函数用于计算节点在二维网格中的行索引\nC. 函数返回的是从给定节点到最小节点的直线距离\nD. 函数用于计算节点的颜色值在一个色彩数组中的索引", "answer": "A"} +{"question": "如下是一个Python函数\"if use_dynamic_weighting: w = (1 + epsilon - epsilon*depth/upper_bound_depth)\",请问它的作用是什么?\nA. 该函数用于重置 `w` 的值为固定常数\nB. 代码段检查 `depth` 是否大于 `upper_bound_depth`\nC. 调整启发式成本的计算,引入动态权重,优化搜索效率\nD. 该函数将 `w` 的值与 `depth` 成正比增加", "answer": "C"} + + diff --git a/examples/llm_simple_qa/testalgorithms/gen/basemodel.py b/examples/llm_simple_qa/testalgorithms/gen/basemodel.py new file mode 100644 index 00000000..fdeedc98 --- /dev/null +++ b/examples/llm_simple_qa/testalgorithms/gen/basemodel.py @@ -0,0 +1,98 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import, division, print_function + +import os +import tempfile +import time +import zipfile +import logging + +import numpy as np +from sedna.common.config import Context +from sedna.common.class_factory import ClassType, ClassFactory + + +from transformers import AutoModelForCausalLM, AutoTokenizer +device = "cuda" # the device to load the model onto + + +logging.disable(logging.WARNING) + +__all__ = ["BaseModel"] + +os.environ['BACKEND_TYPE'] = 'TORCH' + + +@ClassFactory.register(ClassType.GENERAL, alias="gen") +class BaseModel: + + def __init__(self, **kwargs): + self.model = AutoModelForCausalLM.from_pretrained( + "/home/icyfeather/models/Qwen2-0.5B-Instruct", + torch_dtype="auto", + device_map="auto" + ) + self.tokenizer = AutoTokenizer.from_pretrained("/home/icyfeather/models/Qwen2-0.5B-Instruct") + + def train(self, train_data, valid_data=None, **kwargs): + print("BaseModel doesn't need to train") + + + def save(self, model_path): + print("BaseModel doesn't need to save") + + def predict(self, data, input_shape=None, **kwargs): + print("BaseModel predict") + answer_list = [] + for line in data: + response = self._infer(line) + answer_list.append(response) + return answer_list + + def load(self, model_url=None): + print("BaseModel load") + + def evaluate(self, data, model_path, **kwargs): + print("BaseModel evaluate") + + def _infer(self, prompt, system=None): + if system: + messages = [ + {"role": "system", "content": system}, + {"role": "user", "content": prompt} + ] + else: + messages = [ + {"role": "user", "content": prompt} + ] + text = self.tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True + ) + model_inputs = self.tokenizer([text], return_tensors="pt").to(device) + + generated_ids = self.model.generate( + model_inputs.input_ids, + max_new_tokens=512 + ) + generated_ids = [ + output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) + ] + + response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] + + return response diff --git a/examples/llm_simple_qa/testalgorithms/gen/gen_algorithm.yaml b/examples/llm_simple_qa/testalgorithms/gen/gen_algorithm.yaml new file mode 100644 index 00000000..6536ceb9 --- /dev/null +++ b/examples/llm_simple_qa/testalgorithms/gen/gen_algorithm.yaml @@ -0,0 +1,18 @@ +algorithm: + # paradigm name; string type; + # currently the options of value are as follows: + # 1> "singletasklearning" + # 2> "incrementallearning" + paradigm_type: "singletasklearning" + + # algorithm module configuration in the paradigm; list type; + modules: + # kind of algorithm module; string type; + # currently the options of value are as follows: + # 1> "basemodel" + - type: "basemodel" + # name of python module; string type; + # example: basemodel.py has BaseModel module that the alias is "FPN" for this benchmarking; + name: "gen" + # the url address of python module; string type; + url: "./examples/llm/singletask_learning_bench/simple_qa/testalgorithms/gen/basemodel.py" \ No newline at end of file diff --git a/examples/llm_simple_qa/testalgorithms/gen/op_eval.py b/examples/llm_simple_qa/testalgorithms/gen/op_eval.py new file mode 100644 index 00000000..dc6d9c04 --- /dev/null +++ b/examples/llm_simple_qa/testalgorithms/gen/op_eval.py @@ -0,0 +1,21 @@ +from mmengine.config import read_base +from opencompass.models import HuggingFacewithChatTemplate +# import sys +# sys.path.append('/home/icyfeather/project/ianvs') + +with read_base(): + from core.op_extra.datasets.cmmlu.cmmlu_gen import cmmlu_datasets + +datasets = [*cmmlu_datasets] + +models = [ + dict( + type=HuggingFacewithChatTemplate, + abbr='qwen1.5-1.8b-chat-hf', + path='/home/icyfeather/models/Qwen1.5-1.8B-Chat', + max_out_len=1024, + batch_size=2, + run_cfg=dict(num_gpus=1), + stop_words=['<|im_end|>', '<|im_start|>'], + ) +] diff --git a/examples/llm_simple_qa/testenv/acc.py b/examples/llm_simple_qa/testenv/acc.py new file mode 100644 index 00000000..3285a93a --- /dev/null +++ b/examples/llm_simple_qa/testenv/acc.py @@ -0,0 +1,45 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from sedna.common.class_factory import ClassType, ClassFactory + +__all__ = ["acc"] + +def get_last_letter(input_string): + # 检查输入是否为空或只包含非字母字符 + if not input_string or not any(char.isalpha() for char in input_string): + return None + + # 倒序遍历字符串,找到最后一个字母 + for char in reversed(input_string): + if 'A' <= char <= 'D': + return char + + # 如果没有找到字母,返回None + return None + + +@ClassFactory.register(ClassType.GENERAL, alias="acc") +def acc(y_true, y_pred): + y_pred = [get_last_letter(pred) for pred in y_pred] + print(y_true) + print(y_pred) + + # 使用列表推导来比较两个列表中的元素是否相同 + same_elements = [y_pred[i] == y_true[i] for i in range(len(y_pred))] + + # 计算相同元素的数量 + acc = sum(same_elements) / len(same_elements) + + return acc diff --git a/examples/llm_simple_qa/testenv/testenv.yaml b/examples/llm_simple_qa/testenv/testenv.yaml new file mode 100644 index 00000000..0bc7239f --- /dev/null +++ b/examples/llm_simple_qa/testenv/testenv.yaml @@ -0,0 +1,14 @@ +testenv: + # dataset configuration + dataset: + # the url address of train dataset index; string type; + train_data: "/home/icyfeather/Projects/ianvs/dataset/llm_simple_qa/train_data/data.jsonl" + # the url address of test dataset index; string type; + test_data: "/home/icyfeather/Projects/ianvs/dataset/llm_simple_qa/test_data/data.jsonl" + + # metrics configuration for test case's evaluation; list type; + metrics: + # metric name; string type; + - name: "acc" + # the url address of python file + url: "./examples/llm/singletask_learning_bench/simple_qa/testenv/acc.py" diff --git a/examples/smart_coding/smart_coding_learning_bench/comment/testalgorithms/gen/op_eval.py b/examples/smart_coding/smart_coding_learning_bench/comment/testalgorithms/gen/op_eval.py new file mode 100644 index 00000000..d916f64a --- /dev/null +++ b/examples/smart_coding/smart_coding_learning_bench/comment/testalgorithms/gen/op_eval.py @@ -0,0 +1,21 @@ +from mmengine.config import read_base +from opencompass.models import HuggingFacewithChatTemplate +# import sys +# sys.path.append('/home/icyfeather/project/ianvs') + +with read_base(): + from core.op_extra.datasets.cmmlu.cmmlu_gen import cmmlu_datasets + +datasets = [*cmmlu_datasets] + +models = [ + dict( + type=HuggingFacewithChatTemplate, + abbr='qwen2.5-1.5b-chat-hf', + path='/root/autodl-tmp/Qwen2.5-Coder-1.5B-Instruct', + max_out_len=1024, + batch_size=2, + run_cfg=dict(num_gpus=1), + stop_words=['<|im_end|>', '<|im_start|>'], + ) +] diff --git a/examples/smart_coding/smart_coding_learning_bench/issue/testalgorithms/gen/op_eval.py b/examples/smart_coding/smart_coding_learning_bench/issue/testalgorithms/gen/op_eval.py new file mode 100644 index 00000000..d916f64a --- /dev/null +++ b/examples/smart_coding/smart_coding_learning_bench/issue/testalgorithms/gen/op_eval.py @@ -0,0 +1,21 @@ +from mmengine.config import read_base +from opencompass.models import HuggingFacewithChatTemplate +# import sys +# sys.path.append('/home/icyfeather/project/ianvs') + +with read_base(): + from core.op_extra.datasets.cmmlu.cmmlu_gen import cmmlu_datasets + +datasets = [*cmmlu_datasets] + +models = [ + dict( + type=HuggingFacewithChatTemplate, + abbr='qwen2.5-1.5b-chat-hf', + path='/root/autodl-tmp/Qwen2.5-Coder-1.5B-Instruct', + max_out_len=1024, + batch_size=2, + run_cfg=dict(num_gpus=1), + stop_words=['<|im_end|>', '<|im_start|>'], + ) +]